On commence par charger quelques packages, dont phyloseq et DESeq2
#install.packages("knitr")
library(knitr)
library(phyloseq)
library(DT)
library(VennDiagram)
library(tidyverse)
library(vegan)
library(reshape2)
library(permute)
library(lattice)
library(phangorn)
library(parallel)
library(gridBase)
library(biomformat)
library(plotly)
library(ggpubr)
library(openxlsx)
library(tidyr)
library(paletteer)
library(viridis)
library(dplyr)
library(gridExtra)
library(tidyr)
library(ggplot2)
library(plotly)
if (Sys.info()["user"] == 'mmariadasso') {
source("~/Documents/Custom_Functions.R")
} else {
source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/load-extra-functions.R")
}
## Custom package for extra graphical functions
# remotes::install_github("mahendra-mariadassou/phyloseq-extended", ref = "dev")
## Custom package to explore multi-affiliated taxa
# remotes::install_github("mahendra-mariadassou/affiliationExplorer")
# affiliationExplorer::run_app()
library(phyloseq.extended)
Puis on importe les données
frogsBiom <- "/Users/NIPPONNOYE/Documents/Teletravail_20201215/Biom Template Script/LECIMETA.biom1"
#frogsBiom <- "C:/Users/mmonnoye/Documents/Biom/LECIMETA/LECIMETA.biom1"
frogs.data.biom <- import_frogs(frogsBiom, taxMethod = "blast")
metadata<-read.table("/Users/NIPPONNOYE/Documents/Teletravail_20201215/Biom Template Script/SamplenameLECIMETA.txt",row.names=1, header=T)
#metadata<-read.table("C:/Users/mmonnoye/Documents/Biom/LECIMETA/SamplenameLECIMETA.txt",row.names=1, header=T)
sample_data(frogs.data.biom)<-metadata
sample_names(frogs.data.biom) <- get_variable(frogs.data.biom, "Name")
frogs.data<-frogs.data.biom
treefile<- read.tree("/Users/NIPPONNOYE/Documents/Teletravail_20201215/Biom Template Script/LECIMETA_tree.nhx")
#treefile<- read.tree("C:/Users/mmonnoye/Documents/Biom/LECIMETA/LECIMETA_tree.nhx") #if have treefile
phy_tree(frogs.data) <- treefile
## Before correction / visualisation affichage noms taxonomic
#tax_table(frogs.data)[1, ]
tax_table(frogs.data) <- gsub("\"", "", tax_table(frogs.data))
tax_table(frogs.data) <- gsub("unknown.*", "Unknown", tax_table(frogs.data))
## After correction / visualisation affichage noms taxonomic
#tax_table(frogs.data)[1, ]
On dispose de l’object phyloseq suivant:
frogs.data
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 270 taxa and 107 samples ]
## sample_data() Sample Data: [ 107 samples by 5 sample variables ]
## tax_table() Taxonomy Table: [ 270 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 270 tips and 269 internal nodes ]
qui contient les métadonnées suivantes:
sample_variables(frogs.data)
## [1] "Name" "Reads" "Group" "Time_point" "Groupe"
# data <- readxl::read_excel("/Users/NIPPONNOYE/Documents/Teletravail_20201201/ODORANTS/Graph Mahendra juillet 2019/Tableau de synthese preference odeurs.xlsx") %>%
# mutate(ID = factor(ID))
# # Affichage du tableau
# DT::datatable(data, class = "compact")
## select only samples which read count higher than 10000
# subset_samples(frogs.data, sample_sums(frogs.data) > 10000 & Time_point == "TF")
## Remove samples T2_8, T2_68
# subset_samples(frogs.data, !(sample_names(frogs.data) %in% c("T2_8", "T2_68")) )
#frogs.data <- subset_samples(frogs.data, Time_point == "TF")
frogs.data <- subset_samples(frogs.data, Time_point == "TF" & Group %in% c("A","B","C","D"))
# frogs.data <- subset_samples(frogs.data, (Type == "A/J") & (Statut == "HFD") & (Period %in% c("T0", "T4", "Tf")))
# ## On renomme la variable utilisee pour l'abondance differentielle (ici "Type") en "Groupe"
# sample_data(data)$Groupe <- sample_data(data)$Type
Le “Group” correspond aux 6 groupes de l’étude: - Chow
High fat control faible en ALA (acide alpha-linoléinique ; la partie alipidique des régimes HF étant à base d’amidon, caséines…)
High fat avec ALA uniquement via des huiles (tout l’ALA est apporté par des TG)
High fat avec ALA, dont les lipides incluent 10% de lécithine de colza (ainsi vectrice d’~10% de l’ALA contenu dans le régime, sous forme de PL)
High fat avec ALA, dont les lipides incluent 20% de lécithine de colza ( ainsi vectrice d’~20% de l’ALA contenu dans le régime, sous forme de PL )
High fat avec ALA, dont les lipides incluent 10% de lécithine de soja ( ainsi vectrice d’~20% de l’ALA contenu dans le régime, sous forme de PL )
table(get_variable(frogs.data, "Group"))
##
## A B C D
## 6 6 6 6
Le “Groupe” regroupe les échantillons par type de régime et par temps
sample_data(frogs.data) %>% as("data.frame") %>%
count(Group, Time_point)
## Group Time_point n
## 1 A TF 6
## 2 B TF 6
## 3 C TF 6
## 4 D TF 6
# table(get_variable(frogs.data, "Groupe"))
On compte le nombre de reads dans chaque type de régime et à chaque point de temps:
sample_data(frogs.data) %>% as("data.frame") %>%
count(Group, Time_point, wt = Reads, name = "Total_read_count")
## Group Time_point Total_read_count
## 1 A TF 221967
## 2 B TF 204860
## 3 C TF 188544
## 4 D TF 195215
Le “Time_point” correspond aux temps: T0, T2 et TF
table(get_variable(frogs.data, "Time_point"))
##
## TF
## 24
Structure de la table d’OTU (exemple pour 6 OTUs):
#Count phyla by sample
head(otu_table(frogs.data))
## OTU Table: [6 taxa and 24 samples]
## taxa are rows
## TF_25 TF_26 TF_27 TF_31 TF_32 TF_33 TF_37 TF_38 TF_39 TF_43 TF_44
## Cluster_30 49 23 467 137 93 58 114 105 131 76 52
## Cluster_42 0 0 1 0 0 0 4 0 6 0 8
## Cluster_8 1 0 2 0 1 0 5 8 47 1 67
## Cluster_28 110 57 527 95 201 98 214 156 128 122 57
## Cluster_15 92 72 106 352 102 116 183 140 412 126 112
## Cluster_12 26 14 40 12 23 17 219 434 567 353 258
## TF_45 TF_49 TF_50 TF_51 TF_55 TF_56 TF_57 TF_61 TF_62 TF_63 TF_67
## Cluster_30 39 69 75 97 17 48 48 41 17 96 22
## Cluster_42 2 0 4 0 8 1 3 0 5 1 1
## Cluster_8 2 5 10 0 27 3 3 0 9 9 1
## Cluster_28 55 60 81 86 24 79 73 54 23 79 31
## Cluster_15 104 85 98 49 55 55 68 90 162 63 11
## Cluster_12 115 436 868 283 287 851 1521 612 256 244 85
## TF_68 TF_69
## Cluster_30 41 38
## Cluster_42 3 2
## Cluster_8 2 2
## Cluster_28 33 32
## Cluster_15 36 54
## Cluster_12 90 285
#ou ceci, les 5 premiers échantillons
#phyloseq::otu_table(frogs.data)[1:5, 1:5]
Structure Taxonomy Table (exemple pour 6 OTUs):
head(tax_table(frogs.data))
## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## Cluster_30 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_42 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_8 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_28 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_15 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Cluster_12 "Bacteria" "Bacteroidetes" "Bacteroidia" "Bacteroidales"
## Family Genus Species
## Cluster_30 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_42 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_8 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_28 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_15 "Muribaculaceae" "Unknown" "Unknown"
## Cluster_12 "Muribaculaceae" "Unknown" "Unknown"
Structure tableau des metadonnées:
head(sample_data(frogs.data))
## Name Reads Group Time_point Groupe
## TF_25 TF_25 34511 A TF TF_A
## TF_26 TF_26 36503 A TF TF_A
## TF_27 TF_27 42363 A TF TF_A
## TF_31 TF_31 33042 A TF TF_A
## TF_32 TF_32 42068 A TF TF_A
## TF_33 TF_33 33480 A TF TF_A
p<-plot_bar(frogs.data)
plot(p)
#ggplotly(p) #pour avoir figure en html
p <- plot_bar(frogs.data, fill = "Phylum")
plot(p)
#Plot des echantillons couleurs par Family
p <- plot_bar(frogs.data, fill = "Family")
plot(p)
On va commencer par filtrer les OTUs sur la base de leur prévalence: on ne garde que les OTUs qui sont présents dans au moins 50% des échantillons d’au moins un groupe.
# test_function <- function(x) { x > 0 }
# taxa.1 <- genefilter_sample(subset_samples(frogs.data, Group == "Chow"), test_function, A = 3)
# taxa.2 <- genefilter_sample(subset_samples(frogs.data, Group == "HFPalm"), test_function, A = 3)
# taxa.3 <- genefilter_sample(subset_samples(frogs.data, Group == "A"), test_function, A = 3)
# taxa.4 <- genefilter_sample(subset_samples(frogs.data, Group == "B"), test_function, A = 3)
# taxa.5 <- genefilter_sample(subset_samples(frogs.data, Group == "C"), test_function, A = 3)
# taxa.6 <- genefilter_sample(subset_samples(frogs.data, Group == "D"), test_function, A = 3)
# venn.plot <- venn.diagram(list("A" = which(taxa.1),
# "B" = which(taxa.2),"C" = which(taxa.3),
# "D"= which(taxa.4)),
# filename = NULL, fill = c("#FE642E", "#F7D358", "#5FB404", "#0489B1"), lty = "blank",cex = 1.5, cat.cex = 1.5, cat.col = c("#FE642E", "#F7D358", "#5FB404", "#0489B1"))
# grid.newpage(); grid.draw(venn.plot)
Si pas de venn diagramm après le filtre :
prevalence_data <- estimate_prevalence(physeq = frogs.data, group = "Group")
## Keep only OTUs with prevalence > 0.5 in at least one group
taxa_to_keep <- filter(prevalence_data, prevalence >= 0.5) %>% pull(otu)
Répartition des OTU par phylum
#Get count of phyla
table(phyloseq::tax_table(frogs.data)[, "Phylum"])
##
## Bacteroidetes Firmicutes Proteobacteria Tenericutes
## 32 233 4 1
Boxplot Abondance
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data)$Group <- factor(sample_data(frogs.data)$Group,
levels = correct.order)
phy<-tax_glom(frogs.data,taxrank="Phylum")
plotdata<-psmelt(phy)
p<-ggplot(plotdata,aes(x = Group,y=Abundance,fill=Group)) + geom_boxplot(aes(group=Group))+facet_wrap(~Phylum, scales = "free_y",ncol = 1)
#,color=Group
plot(p)
Relative Abondance par échantillon
#install.packages("openxlsx")
#library("openxlsx")
#Convert to relative abundance
data_rel_abund = phyloseq::transform_sample_counts(frogs.data, function(x) 100 * x/sum(x))
#Agglomerate to phylum-level and rename
data_phylum <- phyloseq::tax_glom(data_rel_abund, "Phylum")
phyloseq::taxa_names(data_phylum) <- phyloseq::tax_table(data_phylum)[, "Phylum"]
phyloseq::otu_table(data_phylum)
## OTU Table: [4 taxa and 24 samples]
## taxa are rows
## TF_25 TF_26 TF_27 TF_31 TF_32 TF_33
## Bacteroidetes 30.88340 31.40680 56.022300 36.85098 34.657553 44.99479
## Firmicutes 39.36064 49.04111 40.399326 38.09747 59.190579 40.27261
## Tenericutes 0.00000 0.00000 0.000000 0.00000 0.000000 0.00000
## Proteobacteria 29.75596 19.55208 3.578374 25.05155 6.151868 14.73261
## TF_37 TF_38 TF_39 TF_43 TF_44 TF_45
## Bacteroidetes 52.236341 55.345061 45.31785 63.381344 57.973487 61.417934
## Firmicutes 38.459329 37.993130 41.74556 29.665938 35.057094 28.591374
## Tenericutes 0.000000 0.000000 0.00000 0.000000 0.000000 0.000000
## Proteobacteria 9.304329 6.661809 12.93659 6.952718 6.969419 9.990692
## TF_49 TF_50 TF_51 TF_55 TF_56 TF_57
## Bacteroidetes 56.526006 52.35453 59.10607 56.70439 59.81752 67.918455
## Firmicutes 35.852143 36.60549 34.26729 32.63955 30.07169 24.177396
## Tenericutes 0.000000 0.00000 0.00000 0.00000 0.00000 0.000000
## Proteobacteria 7.621851 11.03998 6.62664 10.65605 10.11079 7.904149
## TF_61 TF_62 TF_63 TF_67 TF_68 TF_69
## Bacteroidetes 47.398718 34.52678 59.592410 50.155682 57.073735 37.41912
## Firmicutes 46.087147 53.60235 33.573436 42.293721 33.996003 33.27642
## Tenericutes 0.000000 0.00000 0.000000 0.000000 0.000000 0.00000
## Proteobacteria 6.514136 11.87087 6.834153 7.550597 8.930262 29.30446
rel_abun<-phyloseq::otu_table(data_phylum)
#write.xlsx(rel_abun,"Relative Abundance Phylum TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data)$Group <- factor(sample_data(frogs.data)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))
p <- plot_composition(frogs.data, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 5, fill = "Phylum")
p<- p + facet_wrap(~Group, scales = "free_x", nrow = 1) # + facet_wrap(Groupe~Statut, scales = "free_x", nrow = 1)
plot(p)
Changement de titre / Personnalisation des couleurs / Supprimer noms axe des x
family.palette<- c( "Bacteroidetes" = "#CBAC45",
"Firmicutes" = "#3DCC98",
"Proteobacteria" = "#48B3E8",
"Tenericutes" = "#F28372")
p <- plot_composition(frogs.data, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 4, fill = "Phylum")+ ggtitle("V1-V3") + scale_color_manual(values = family.palette) + scale_fill_manual(values = family.palette)
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1) + theme(axis.text.x = element_blank(), axis.ticks = element_blank())
#p <- p + facet_wrap(Groupe~Statut, scales = "free_x", nrow = 1)
plot(p)
#ggsave(p, file = "/Users/NIPPONNOYE/Documents/Teletravail_20201211/V1V3 _ 8 sample/Phylum Composition.png", width = 6, height = 5,dpi=300)
Test de Kruskal (Anova non paramétrique) pour chaque Phylum pour tester si les abondances sont similaires (pour ce Phylum) entre les différents groupes.
# depth <- sample_sums(frogs.data)[1]
data.phylum <- frogs.data %>%
transform_sample_counts(function(x) { x / sum(x)}) %>% ## transform counts to proportions
fast_tax_glom(taxrank = "Phylum") %>%
psmelt() %>%
mutate(Group = factor(Group, labels = c("A", "B","C", "D")))
data.test <- compare_means(Abundance ~ Group, data = data.phylum, method = "kruskal", group.by = "Phylum")
data.test
## # A tibble: 4 x 7
## Phylum .y. p p.adj p.format p.signif method
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Bacteroidetes Abundance 0.0119 0.036 0.012 * Kruskal-Wallis
## 2 Firmicutes Abundance 0.0314 0.063 0.031 * Kruskal-Wallis
## 3 Proteobacteria Abundance 0.780 0.78 0.780 ns Kruskal-Wallis
## 4 Tenericutes Abundance NaN NaN NA ? Kruskal-Wallis
On change les noms de Phylum dans notre objet phyloseq pour y rajouter les étoiles de significativité.
new.phylum <- c("Firmicutes" = "Firmicutes (*)",
"Proteobacteria" = "Proteobacteria",
"Bacteroidetes" = "Bacteroidetes (*)",
"Tenericutes" = "Tenericutes"
)
frogs.data.2 <- frogs.data
tax_table(frogs.data.2)[ , "Phylum"] <- new.phylum[as(tax_table(frogs.data), "matrix")[ , "Phylum"]]
sample_data(frogs.data.2)$Group <- factor(sample_data(frogs.data.2)$Group, labels = c("A", "B","C", "D"))
On va enlever les Tenericutes car seulement 1 OTU.
my.palette <- c("#FC8385", "#71C749", "#55CDD3")
#Retirer Tenericutes
p1 <- plot_composition(frogs.data.2 %>% subset_taxa(Phylum != "Tenericutes"),
"Kingdom", "Bacteria", "Phylum", fill = "Phylum", facet_grid = "~Group", x = "Name", numberOfTaxa = 6) +
scale_fill_manual(name = NULL, values = my.palette) +
scale_color_manual(name = NULL, values = my.palette) +
theme(legend.position = "bottom", axis.text.x = element_blank())+
theme(legend.text = element_text(size=12)) #taille texte legende
plot(p1)
Test Dunn Post-Hoc sur les résultats du Kruskall Wallis de l’impact du group sur l’abondance par Phylum
filter(data.phylum, Phylum == "Bacteroidetes") %>%
dunn.test::dunn.test(x = .$Abundance, g = .$Group, method = "bh")
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | A B C
## ---------+---------------------------------
## B | -2.531139
## | 0.0171*
## |
## C | -3.021037 -0.489897
## | 0.0076* 0.3121
## |
## D | -1.306394 1.224744 1.714642
## | 0.1436 0.1324 0.0864
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
filter(data.phylum, Phylum == "Firmicutes") %>%
dunn.test::dunn.test(x = .$Abundance, g = .$Group, method = "bh")
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | A B C
## ---------+---------------------------------
## B | 2.000416
## | 0.0682
## |
## C | 2.816913 0.816496
## | 0.0145* 0.2071
## |
## D | 1.061445 -0.938971 -1.755467
## | 0.2164 0.2086 0.0792
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
filter(data.phylum, Phylum == "Proteobacteria") %>%
dunn.test::dunn.test(x = .$Abundance, g = .$Group, method = "bh")
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | A B C
## ---------+---------------------------------
## B | 0.979795
## | 0.9816
## |
## C | 0.694022 -0.285773
## | 0.4877 0.5813
## |
## D | 0.775671 -0.204124 0.081649
## | 0.6569 0.5030 0.4675
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
Relative abondance par groupe
#Convert to relative abundance
data_rel_abund = phyloseq::transform_sample_counts(frogs.data, function(x) 100 * x/sum(x))
#Agglomerate to phylum-level and rename
data_phylum <- phyloseq::tax_glom(data_rel_abund, "Phylum")
ps1 <- merge_samples(data_phylum, "Group")
data_phylum_2 <- transform_sample_counts(ps1, function(x) 100 * x/sum(x))
phyloseq::taxa_names(data_phylum_2) <- phyloseq::tax_table(data_phylum_2)[, "Phylum"]
phyloseq::otu_table(data_phylum_2)
## OTU Table: [4 taxa and 4 samples]
## taxa are columns
## Bacteroidetes Firmicutes Tenericutes Proteobacteria
## A 39.13597 44.39362 0 16.470406
## B 55.94534 35.25207 0 8.802593
## C 58.73783 32.26893 0 8.993243
## D 47.69441 40.47151 0 11.834080
rel_abun<-phyloseq::otu_table(data_phylum_2)
#write.xlsx(rel_abun,"Relative Abondance Phylum moyenne TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
Boxplot Relative Abundance en %
#Melt and plot
phyloseq::psmelt(data_phylum) %>%
ggplot(data = ., aes(x = Group, y = Abundance,color=Phylum)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(aes(color = Phylum), height = 0, width = .2) +
labs(x = "", y = "Relative Abundance (%)") +
facet_wrap(~ Phylum, scales = "free",ncol = 1)
Retirer Tenericutes
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))
p <- plot_composition(frogs.data.merged %>% subset_taxa(Phylum != "Tenericutes"), "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 5, fill = "Phylum")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Phylum Composition TF") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
positions <- c("A", "B","C", "D")
p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 5, fill = "Phylum")
#p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Phylum")+theme(panel.background=element_rect(fill="transparent",colour=NA)) + ## Changer le titre + Changer arrière plan
#theme(legend.position = "bottom",legend.box = "vertical")
theme(legend.title = element_text(color = "black", size = 14),legend.text = element_text(color = "black", size = 10))+ #legende taille noms
theme(plot.margin=unit(c(0.1,0.1,6,0.1),"cm")) + theme(legend.position=c(0.50,-0.50))+
# Modifier elements titre et nom des axes
theme(plot.title = element_text(color="Black", size=16, face="bold.italic",hjust=0.5),
axis.title.y = element_text(color="black", size=14))+
# Modifier graduation des axes
theme(axis.text.x = element_text(face="bold", color="black", size=13),
axis.text.y = element_text( color="black", size=12))+ #, angle=45, face="bold"
scale_x_discrete(limits = positions) # changer ordre bars selon l'argument position
plot(p)
#ggsave(p, file = "/Users/NIPPONNOYE/Documents/2020 Boulot Mag/2020 Télétravail/Teletravail_20201001/Phylum.eps", width = 3, height = 6)
Titre et sous titre / Echelle en %
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))
p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Phylum",numberOfTaxa = 6, fill = "Phylum")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)+
scale_y_continuous(label = scales::percent)
p <- p + labs(x = "Samples", y = "Relative abundance (%)",
title = "Phylum Composition TF",
subtitle = "6 Top Phylum")
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Phylum Composition.png", width = 4, height = 4)
Boxplot Phylum avec stats
On va se focaliser sur les 4 Phylum et représenter leurs abondances dans les différents groupe sous forme de boxplot. On va utiliser A comme groupe de référence et le comparer à tout les autres avec un test de wilcoxon pour voir desquels il diffère. Le nombre d’étoiles codent le niveau de significativité, les comparaisons non-significatives ne sont pas indiqués
## Select only some families
families <- c("Bacteroidetes", "Firmicutes", "Proteobacteria","Tenericutes")
phy <- frogs.data %>% subset_taxa(Phylum %in% families) %>% tax_glom(taxrank="Phylum") #agglomerate at family level
## Transform count to relative abundances
depth <- sample_sums(frogs.data)[1]
plotdata<-psmelt(phy) %>%
mutate(Abundance = Abundance / depth,
# Phylum = factor(Phylum, levels = families), ## set family order to the one specified in `families` rather than the alphabetical one.
Group = factor(Group, labels = c("A", "B","C", "D"))) ## change treatment names to the paper ones
p <- ggplot(plotdata,aes(x = Group,y=Abundance, color = Group, Group = Group)) +
stat_boxplot(geom = "errorbar", width = 0.5) +
geom_boxplot(outlier.alpha = 1,
outlier.size = 0.8) +
facet_wrap(~Phylum, scales = "free_y", ncol = 4) +
scale_color_manual(values = c("A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5"),
guide = "none") +
# theme_classic() + ## fond blanc
labs(x = NULL) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15))
## Compare reference level Chow to all other levels using a wilcoxon test and adjust p-values using the holm correction.
p <- p + stat_compare_means(aes(label = ..p.signif..), method = "wilcox.test", p.adjust.method = "holm", ref.group = "Chow", hide.ns = T,
label.y.npc = c(0.90),
size = 7,
fontface = "bold")
plot(p)
## Représentation des communautés au niveau Family
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data)$Group <- factor(sample_data(frogs.data)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))
p <- plot_composition(frogs.data, "Kingdom", "Bacteria", "Family",numberOfTaxa = 12, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
plot(p)
Relative abondance par Family
#Convert to relative abundance
data_rel_abund = phyloseq::transform_sample_counts(frogs.data, function(x) 100 * x/sum(x))
#Agglomerate to phylum-level and rename
data_phylum <- phyloseq::tax_glom(data_rel_abund, "Family")
ps1 <- merge_samples(data_phylum, "Group")
data_phylum_2 <- transform_sample_counts(ps1, function(x) 100 * x/sum(x))
phyloseq::taxa_names(data_phylum_2) <- phyloseq::tax_table(data_phylum_2)[, "Family"]
phyloseq::otu_table(data_phylum_2)
## OTU Table: [19 taxa and 4 samples]
## taxa are columns
## Muribaculaceae Tannerellaceae Prevotellaceae Bacteroidaceae Marinifilaceae
## A 7.511593 0.6872592 1.102289 5.813509 2.485080
## B 11.859320 1.5912746 3.958224 5.052995 5.560867
## C 16.188255 0.4286485 6.370489 6.410393 4.329306
## D 9.751098 0.3719997 4.922359 5.455489 4.045521
## Rikenellaceae Lachnospiraceae Peptostreptococcaceae Family XIII
## A 21.53624 27.99114 0.1150514 0.13689625
## B 27.92266 21.97917 0.1359102 0.09446598
## C 25.01074 22.17903 0.1403283 0.04089012
## D 23.14794 28.37958 0.5832495 0.08085836
## Clostridiaceae 1 Anaeroplasmataceae Erysipelotrichaceae Lactobacillaceae
## A 0.03496661 0 2.8539398 0.3516066
## B 0.04968114 0 0.3481496 0.7941359
## C 0.17255032 0 0.2781379 0.5096389
## D 0.08402799 0 0.6920717 2.0834285
## Christensenellaceae Ruminococcaceae Clostridiales vadinBB60 group
## A 0.006407880 12.876443 0.02717136
## B 0.025499817 10.315114 1.50994356
## C 0.028894354 7.089756 1.82969743
## D 0.009367567 7.651599 0.90732859
## Desulfovibrionaceae Burkholderiaceae Enterobacteriaceae
## A 15.886803 0.53515495 0.04844804
## B 8.637055 0.10078781 0.06475014
## C 8.839571 0.08640673 0.06726520
## D 11.506948 0.16843367 0.15869848
rel_abun<-phyloseq::otu_table(data_phylum_2)
#write.xlsx(rel_abun,"Relative Abondance Family moyenne TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
Chunk pour meilleure qualité de graph
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "Groupe"))
p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Family",numberOfTaxa = 6, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Family Composition TF (6 top Family)") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
Boxplot Family avec stats
On va se focaliser sur quelques familles et représenter leurs abondances dans les différents groupe sous forme de boxplot. On va utiliser Chow comme groupe de référence et le comparer à tout les autres avec un test de wilcoxon pour voir desquels il diffère. Le nombre d’étoiles codent le niveau de significativité, les comparaisons non-significatives ne sont pas indiqués
## Select only some families
families <- c("Rikenellaceae", "Bacteroidaceae", "Muribaculaceae","Lachnospiraceae", "Ruminococcaceae", "Desulfovibrionaceae")
phy <- frogs.data %>% subset_taxa(Family %in% families) %>% tax_glom(taxrank="Family") #agglomerate at family level
## Transform count to relative abundances
depth <- sample_sums(frogs.data)[1]
plotdata<-psmelt(phy) %>%
mutate(Abundance = Abundance / depth,
# Family = factor(Family, levels = families), ## set family order to the one specified in `families` rather than the alphabetical one.
Group = factor(Group, labels = c("A", "B","C", "D"))) ## change treatment names to the paper ones
p <- ggplot(plotdata,aes(x = Group,y=Abundance, color = Group, Group = Group)) +
stat_boxplot(geom = "errorbar", width = 0.5) +
geom_boxplot(outlier.alpha = 1,
outlier.size = 0.8) +
facet_wrap(~Family, scales = "free_y", ncol = 2) +
#scale_color_brewer(palette = "Set3") +
# theme_classic() + ## fond blanc
labs(x = NULL) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15))
## Compare reference level Chow to all other levels using a wilcoxon test and adjust p-values using the holm correction.
p3 <- p + stat_compare_means(aes(label = ..p.signif..), method = "wilcox.test", p.adjust.method = "holm", ref.group = "Chow", hide.ns = T,
label.y.npc = c(0.90),
size = 7,
fontface = "bold")
plot(p3)
Palette Viridis
## Select only some families
families <- c("Rikenellaceae", "Bacteroidaceae", "Muribaculaceae","Lachnospiraceae", "Ruminococcaceae", "Desulfovibrionaceae")
phy <- frogs.data %>% subset_taxa(Family %in% families) %>% tax_glom(taxrank="Family") #agglomerate at family level
## Transform count to relative abundances
depth <- sample_sums(frogs.data)[1]
plotdata<-psmelt(phy) %>%
mutate(Abundance = Abundance / depth,
# Family = factor(Family, levels = families), ## set family order to the one specified in `families` rather than the alphabetical one.
Group = factor(Group, labels = c("A", "B","C", "D"))) ## change treatment names to the paper ones
p <- ggplot(plotdata,aes(x = Group,y=Abundance, color = Group, Group = Group)) +
stat_boxplot(geom = "errorbar", width = 0.5) +
geom_boxplot(outlier.alpha = 1,
outlier.size = 0.8) +
facet_wrap(~Family, scales = "free_y", ncol = 2) +
### Palette colours viridis
scale_color_viridis(discrete = TRUE, option = "D")+
scale_fill_viridis(discrete = TRUE) +
theme_minimal() +
# theme_classic() + ## fond blanc
labs(x = NULL) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 15))
## Compare reference level Chow to all other levels using a wilcoxon test and adjust p-values using the holm correction.
p <- p + stat_compare_means(aes(label = ..p.signif..), method = "wilcox.test", p.adjust.method = "holm", ref.group = "Chow", hide.ns = T,
label.y.npc = c(0.90),
size = 7,
fontface = "bold")
plot(p)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
#levels(get_variable(frogs.data.merged, "diet"))
my.pal <- c(RColorBrewer::brewer.pal(n = 12, name = "Paired"), "gray")
p <- plot_composition(frogs.data.merged, "Kingdom", "Bacteria", "Family",numberOfTaxa = 12, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1) +
scale_fill_manual(values = my.pal) +
scale_color_manual(values = my.pal) +
scale_y_continuous(label = scales::percent)
#theme_bw()
## scale_fill_brewer(palette="Paired") +
## scale_colour_brewer(palette="Paired") +
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Family Composition.png", width = 6, height = 4)
Les 4 phylums d’intérêts sont les Bacteroidetes, les Firmicutes, les Proteobacteria et les Tenericutes.
On va zoomer au niveau Famille dans chacun de ces phylums.
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data, "Phylum", "Bacteroidetes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
#ggplotly(p)
plot(p)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data.merged, "Phylum", "Bacteroidetes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroidetes Composition TF") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
Autre echelle de couleur
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data.merged, "Phylum", "Bacteroidetes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroidetes Composition ")+
scale_y_continuous(label = scales::percent) + scale_color_paletteer_d("pals::glasbey")+
scale_fill_paletteer_d("pals::glasbey")
plot(p)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
p <- plot_composition(frogs.data.merged, "Family", "Bacteroidaceae", "Genus",numberOfTaxa = 14, fill = "Genus")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroidaceae Composition ")+
scale_y_continuous(label = scales::percent) + scale_color_paletteer_d("pals::glasbey")+
scale_fill_paletteer_d("pals::glasbey") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
#ggsave(p, file = "Lactobacillaceae Composition .png",width = 9, height = 7,dpi=300)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilisé pour fusionner les échantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
p <- plot_composition(frogs.data.merged, "Genus", "Bacteroides", "Species",numberOfTaxa = 14, fill = "Species")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Bacteroides Composition ")+
scale_y_continuous(label = scales::percent) + scale_color_paletteer_d("pals::glasbey")+
scale_fill_paletteer_d("pals::glasbey") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
#ggsave(p, file = "Ruminococcus 1 Composition .png",width = 8, height = 7,dpi=300)
correct.order <- c( "A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data, "Phylum", "Firmicutes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
#ggplotly(p)
plot(p)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data.merged, "Phylum", "Firmicutes", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Firmicutes Composition TF") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data, "Phylum", "Proteobacteria", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
plot(p)
frogs.data.merged<-merge_samples(frogs.data,group="Group")
## Restaurer les niveaux du facteur utilis? pour fusionner les ?chantillons
sample_data(frogs.data.merged)$Group <- sample_names(frogs.data.merged)
correct.order <- c("A", "B","C", "D")
sample_data(frogs.data.merged)$Group <- factor(sample_data(frogs.data.merged)$Group,
levels = correct.order)
p <- plot_composition(frogs.data.merged, "Phylum", "Proteobacteria", "Family",numberOfTaxa = 5, fill = "Family")
p <- p + facet_wrap(~Group, scales = "free_x", nrow = 1)
p <- p + ggtitle("Proteobacteria Composition TF") ## Changer le titre
plot(p)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/Biom/Zahra/figure_example_4x4.png", width = 3, height = 4)
Comme ces distances sont sensibles à la profondeur d’échantillonnage, on va raréfier les échantillons avant de calculer les distances. Normalisation par rarefaction: même nombre de séquences pour chaque échantillon.
frogs.data.rare<-rarefy_even_depth(frogs.data,rngseed=20170329)
sample_sums(frogs.data.rare)[1:5]
## TF_25 TF_26 TF_27 TF_31 TF_32
## 5692 5692 5692 5692 5692
Avant de calculer les diversités \(\alpha\), on va faire des courbes de raréfaction pour vérifier si on a saturé la richesse sous-dominante (i.e. celle qui passe les filtres d’abondances).
Infos Richesse: nombre d’OTUs par échantillon représenté par les courbes de raréfaction
Principe courbes de rarefaction : Compter le nombre d’OTU pour un ensemble de sous-échantillon à différents intervalles de profondeur (x= nombre de séquences et y= nombre d’OTU)
#Rarefaction curves Regime (for observed richness)
#correct.order <- c("Avant_Stress", "Milieu_Stress", "Fin_Stress")
#sample_data(frogs.data.merged)$Groupe <- factor(sample_data(frogs.data.merged)$Groupe,
#levels = correct.order)
family.palette<- c( "A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- ggrare(frogs.data, step = 100, color = "Group", plot = FALSE)
## rarefying sample TF_25
## rarefying sample TF_26
## rarefying sample TF_27
## rarefying sample TF_31
## rarefying sample TF_32
## rarefying sample TF_33
## rarefying sample TF_37
## rarefying sample TF_38
## rarefying sample TF_39
## rarefying sample TF_43
## rarefying sample TF_44
## rarefying sample TF_45
## rarefying sample TF_49
## rarefying sample TF_50
## rarefying sample TF_51
## rarefying sample TF_55
## rarefying sample TF_56
## rarefying sample TF_57
## rarefying sample TF_61
## rarefying sample TF_62
## rarefying sample TF_63
## rarefying sample TF_67
## rarefying sample TF_68
## rarefying sample TF_69
rare.level <- min(sample_sums(frogs.data))
p <- p + facet_wrap(~Group) + geom_vline(xintercept = rare.level, color = "Gray60")+
xlab("Sample Size (nb de séquences)") + ylab("Species Richness (nb d'OTU)")+ scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c( "A","B", "C", "D"),
values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
#ggplotly(p)
plot(p)
#correct.order <- c("Avant_Stress", "Milieu_Stress", "Fin_Stress")
#sample_data(frogs.data.rare)$Groupe <- factor(sample_data(frogs.data.rare)$Groupe,
#levels = correct.order)
# family.palette<- c(
# "A" = "#09C51F",
# "B" = "#8BE9F9",
# "C" = "#0995C5",
# "D" = "#094DC5")
p <- plot_richness(frogs.data.rare, color = "Group",measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"),x = "Group", title = "Alpha diversity Comparaison microbiote") +theme_bw() + geom_boxplot(aes(fill = Group)) + geom_point() + theme(axis.text.x = element_blank())
# + scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c("A","B", "C", "D"),
# values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
#ggplotly(p)
plot(p)
richness.table <- estimate_richness(frogs.data.rare, measures = c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"))
richness.table <- cbind(richness.table, sample_data(frogs.data.rare))
richness.table$Depth <- sample_sums(frogs.data.rare)
#knitr::kable(richness.table)
richness.table
## Observed Chao1 se.chao1 ACE se.ACE Shannon Simpson
## TF_25 125 138.9091 8.684721 137.1734 5.781133 3.235785 0.8865241
## TF_26 110 122.0000 7.960353 119.8046 5.359846 2.986277 0.8953327
## TF_27 98 125.1429 16.502556 113.9737 5.308976 2.931824 0.8910912
## TF_31 110 127.1000 10.410991 124.0407 5.585664 3.183385 0.9126987
## TF_32 145 174.5455 15.534677 163.3709 6.319929 3.555453 0.9399338
## TF_33 123 131.5714 5.748565 132.6944 5.652564 3.460261 0.9358264
## TF_37 155 171.8667 9.289624 167.9237 6.271825 3.572851 0.9429965
## TF_38 153 162.0667 5.894332 160.7415 6.148528 3.746127 0.9586345
## TF_39 157 173.8667 9.289637 171.0081 6.445127 3.790271 0.9593817
## TF_43 149 162.6364 7.259506 164.8707 6.189332 3.440639 0.9347040
## TF_44 136 155.4615 10.733898 150.3924 5.942000 3.308821 0.9199082
## TF_45 131 137.5000 4.881065 136.8566 5.692891 3.347422 0.9281925
## TF_49 150 155.2174 3.689699 158.3837 6.135304 3.626107 0.9521367
## TF_50 151 167.8667 9.289597 163.6842 6.180999 3.582712 0.9484626
## TF_51 145 152.7727 4.881132 156.5957 6.153296 3.546282 0.9448029
## TF_55 132 141.2308 6.165558 140.2350 5.772755 3.328202 0.9211704
## TF_56 126 139.1250 7.594515 138.5742 5.618986 3.030536 0.9155367
## TF_57 130 135.2174 3.689656 139.6014 5.802700 3.108889 0.9085717
## TF_61 109 130.1111 12.606579 123.3349 5.526767 3.061096 0.9086255
## TF_62 128 139.7692 7.362383 138.5076 5.706652 3.566590 0.9506657
## TF_63 124 131.0000 4.904939 132.6817 5.651270 3.475671 0.9442988
## TF_67 147 172.0000 13.300239 164.0753 6.343085 3.598094 0.9506249
## TF_68 136 147.4000 6.955166 146.1642 5.833108 3.309084 0.9185663
## TF_69 127 139.1579 6.905637 144.7155 6.039969 3.059149 0.8764162
## InvSimpson Fisher Name Reads Group Time_point Groupe Depth
## TF_25 8.812444 22.59075 TF_25 34511 A TF TF_A 5692
## TF_26 9.554081 19.33860 TF_26 36503 A TF TF_A 5692
## TF_27 9.181993 16.81721 TF_27 42363 A TF TF_A 5692
## TF_31 11.454579 19.33860 TF_31 33042 A TF TF_A 5692
## TF_32 16.648304 27.09080 TF_32 42068 A TF TF_A 5692
## TF_33 15.582730 22.15089 TF_33 33480 A TF TF_A 5692
## TF_37 17.542789 29.40780 TF_37 40689 B TF TF_B 5692
## TF_38 24.174757 28.94092 TF_38 28018 B TF TF_B 5692
## TF_39 24.619459 29.87640 TF_39 37592 B TF TF_B 5692
## TF_43 15.314875 28.01236 TF_43 32693 B TF TF_B 5692
## TF_44 12.485679 25.04326 TF_44 35838 B TF TF_B 5692
## TF_45 13.926130 23.92156 TF_45 30030 B TF TF_B 5692
## TF_49 20.892815 28.24385 TF_49 26887 C TF TF_C 5692
## TF_50 19.403376 28.47577 TF_50 30963 C TF TF_C 5692
## TF_51 18.116887 27.09080 TF_51 33423 C TF TF_C 5692
## TF_55 12.685588 24.14499 TF_55 32956 C TF TF_C 5692
## TF_56 11.839457 22.81139 TF_56 34997 C TF TF_C 5692
## TF_57 10.937536 23.69860 TF_57 29318 C TF TF_C 5692
## TF_61 10.943972 19.12570 TF_61 36616 D TF TF_D 5692
## TF_62 20.269889 23.25406 TF_62 32423 D TF TF_D 5692
## TF_63 17.952931 22.37059 TF_63 29127 D TF TF_D 5692
## TF_67 20.253138 27.55070 TF_67 30057 D TF TF_D 5692
## TF_68 12.279925 25.04326 TF_68 32306 D TF TF_D 5692
## TF_69 8.091673 23.03249 TF_69 34686 D TF TF_D 5692
#write.xlsx(richness.table,"Richness table T11.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
#write.table(richness.table,"Richness_table_Rouen.csv",row.names=FALSE, sep="\t",dec=",",na=" ")
TABLEAU CI DESSUS AU FORMAT EXCEL DANS LE FICHIER “TABLEAU DE VALEURS” JOINT AVEC CE DOCUMENT
Analyse de la variance, compare les moyennes d’échantillons.
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Observed"),
sample_data(frogs.data.rare))
model <- aov(Observed ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: Observed
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 428613 107153 647.09 < 2.2e-16 ***
## Residuals 20 3312 166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## impact of Groupe and Time_point on richness after correction for depth / seulement possible si T0 et Tf
#observed.aov <- aov(Observed ~ Depth + Groupe + Time_point, data = richness.table)
#anova(observed.aov)
Df: nombre de degrés de liberté Sum Sq: inertie inter et intra respectivement Mean Sq: c’est la variance, obtenue en faisant le quotient de la 2eme colonne par la 1ere (moyenne des inerties) F value: obtenue en faisant quotient des 2 valeurs trouvées dans la 3eme colonne Pr(>F): pvalue
coef function: permet d’extraire que les coefficients estimés, mesure l’écart des groupes à la moyenne. Groupes équilibrés si les valeurs sont du même ordre de grandeur.
coef(model)
## GroupA GroupB GroupC GroupD
## 118.5000 146.8333 139.0000 128.5000
Utilisé pour déterminer les différences significatives entre les moyennes de groupes dans une analyse de variance. Pour étudier les différences inter-groupes, permet de distinguer parmi les éechantillons s’il y en a qui différent significativement des autres.
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Observed ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 28.333333 7.5386839 49.127983 0.0055144
## C-A 20.500000 -0.2946494 41.294649 0.0542051
## D-A 10.000000 -10.7946494 30.794649 0.5458805
## C-B -7.833333 -28.6279827 12.961316 0.7202204
## D-B -18.333333 -39.1279827 2.461316 0.0962765
## D-C -10.500000 -31.2946494 10.294649 0.5059833
diff: différences entre les moyennes obséervées lwr et upr: donnent les bornes inférieure et supérieur de l’intervalle p adj: pvalue après ajustement pour les comparaisons multiples
comparison_data <- compare_means(
Observed ~ Group,
data = div_data,
method = "wilcox.test"
) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## # p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
# Observed ~ Group,
# data = div_data,
# method = "wilcox.test"
# ) %>% filter(p.adj <= 0.05) %>%
# mutate(p.signif = symnum(
# p.adj,
# cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
# symbols = c("****", "***", "**", "*", "ns"))) %>%
# mutate(y = c(185, 140, 160, 170, 165),
# x = c(1.5, 2, 3.5, 3, 3.5))
#
# ggplot(div_data, aes(x = Group, y = Observed)) +
# geom_point() +
# geom_boxplot(aes(fill = Group)) +
# geom_segment(data = comparison_data,
# aes(x = group1, xend = group2, y = y, yend = y)) +
# geom_text(data = comparison_data,
# aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Chao1"),
sample_data(frogs.data.rare))
model <- aov(Chao1 ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: Chao1
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 522103 130526 552.46 < 2.2e-16 ***
## Residuals 20 4725 236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model)
## GroupA GroupB GroupC GroupD
## 136.5448 160.5663 148.5717 143.2397
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Chao1 ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 24.021512 -0.8173599 48.860384 0.0602594
## C-A 12.026852 -12.8120193 36.865724 0.5403591
## D-A 6.694901 -18.1439708 31.533773 0.8737170
## C-B -11.994659 -36.8335311 12.844212 0.5425251
## D-B -17.326611 -42.1654827 7.512261 0.2388774
## D-C -5.331952 -30.1708233 19.506920 0.9306014
comparison_data <- compare_means(
Chao1 ~ Group,
data = div_data,
method = "wilcox.test"
) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## # p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
# Chao1 ~ Group,
# data = div_data,
# method = "wilcox.test"
# ) %>% filter(p.adj <= 0.05) %>%
# mutate(p.signif = symnum(
# p.adj,
# cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
# symbols = c("****", "***", "**", "*", "ns"))) %>%
# mutate(y = c(200, 160, 180),
# x = c(1.5, 2, 3.5))
#
# ggplot(div_data, aes(x = Group, y = Chao1)) +
# geom_point() +
# geom_boxplot(aes(fill = Group)) +
# geom_segment(data = comparison_data,
# aes(x = group1, xend = group2, y = y, yend = y)) +
# geom_text(data = comparison_data,
# aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre
#####Anova sur la Richness ACE
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "ACE"),
sample_data(frogs.data.rare))
model <- aov(ACE ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: ACE
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 509673 127418 642.68 < 2.2e-16 ***
## Residuals 20 3965 198
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model)
## GroupA GroupB GroupC GroupD
## 131.8430 158.6322 149.5124 141.5799
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = ACE ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 26.789200 4.035502 49.542897 0.0174674
## C-A 17.669409 -5.084288 40.423107 0.1648824
## D-A 9.736900 -13.016797 32.490598 0.6352594
## C-B -9.119790 -31.873488 13.633907 0.6807042
## D-B -17.052299 -39.805997 5.701398 0.1879308
## D-C -7.932509 -30.686207 14.821188 0.7645540
comparison_data <- compare_means(
ACE ~ Group,
data = div_data,
method = "wilcox.test"
) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## # p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
# ACE ~ Group,
# data = div_data,
# method = "wilcox.test"
# ) %>% filter(p.adj <= 0.05) %>%
# mutate(p.signif = symnum(
# p.adj,
# cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
# symbols = c("****", "***", "**", "*", "ns"))) %>%
# mutate(y = c(200, 160, 180),
# x = c(1.5, 2, 3.5))
#
# ggplot(div_data, aes(x = Group, y = ACE)) +
# geom_point() +
# geom_boxplot(aes(fill = Group)) +
# geom_segment(data = comparison_data,
# aes(x = group1, xend = group2, y = y, yend = y)) +
# geom_text(data = comparison_data,
# aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Shannon"),
sample_data(frogs.data.rare))
model <- aov(Shannon ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: Shannon
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 272.665 68.166 1199.4 < 2.2e-16 ***
## Residuals 20 1.137 0.057
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model)
## GroupA GroupB GroupC GroupD
## 3.225497 3.534355 3.370455 3.344948
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 0.30885758 -0.0763832 0.6940984 0.1455726
## C-A 0.14495720 -0.2402836 0.5301980 0.7209027
## D-A 0.11945026 -0.2657905 0.5046910 0.8211981
## C-B -0.16390038 -0.5491412 0.2213404 0.6394364
## D-B -0.18940732 -0.5746481 0.1958335 0.5279221
## D-C -0.02550695 -0.4107477 0.3597338 0.9976643
comparison_data <- compare_means(
Shannon ~ Group,
data = div_data,
method = "wilcox.test"
) %>% filter(p.adj <= 0.05)
comparison_data
## # A tibble: 0 x 8
## # … with 8 variables: .y. <chr>, group1 <chr>, group2 <chr>, p <dbl>,
## # p.adj <dbl>, p.format <chr>, p.signif <chr>, method <chr>
# comparison_data <- compare_means(
# Shannon ~ Group,
# data = div_data,
# method = "wilcox.test"
# ) %>% filter(p.adj <= 0.05) %>%
# mutate(p.signif = symnum(
# p.adj,
# cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
# symbols = c("****", "***", "**", "*", "ns"))) %>%
# mutate(y = c(4, 4.5, 4.3,4.2),
# x = c(1.5, 2, 3.5,4))
#
# ggplot(div_data, aes(x = Group, y = Shannon)) +
# geom_point() +
# geom_boxplot(aes(fill = Group)) +
# geom_segment(data = comparison_data,
# aes(x = group1, xend = group2, y = y, yend = y)) +
# geom_text(data = comparison_data,
# aes(x = x, y = y, label = p.signif), nudge_y =1 ) #nudge_y=1 retrait etoile par rapport a la barre
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Simpson"),
sample_data(frogs.data.rare))
model <- aov(Simpson ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: Simpson
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 20.6215 5.1554 10179 < 2.2e-16 ***
## Residuals 20 0.0101 0.0005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model)
## GroupA GroupB GroupC GroupD
## 0.9102345 0.9406363 0.9317801 0.9248662
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Simpson ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 0.030401786 -0.005965199 0.06676877 0.1223289
## C-A 0.021545673 -0.014821312 0.05791266 0.3706634
## D-A 0.014631758 -0.021735227 0.05099874 0.6781601
## C-B -0.008856113 -0.045223098 0.02751087 0.9028683
## D-B -0.015770028 -0.052137013 0.02059696 0.6256180
## D-C -0.006913915 -0.043280900 0.02945307 0.9502164
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "InvSimpson"),
sample_data(frogs.data.rare))
model <- aov(InvSimpson ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: InvSimpson
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 5604.5 1401.1 66.103 3.045e-11 ***
## Residuals 20 423.9 21.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model)
## GroupA GroupB GroupC GroupD
## 11.87236 18.01061 15.64594 14.96525
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = InvSimpson ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 6.1382598 -1.301546 13.578065 0.1293742
## C-A 3.7735880 -3.666218 11.213394 0.5022458
## D-A 3.0928996 -4.346906 10.532705 0.6557963
## C-B -2.3646719 -9.804477 5.075134 0.8102475
## D-B -3.0453602 -10.485166 4.394445 0.6664972
## D-C -0.6806883 -8.120494 6.759117 0.9939189
div_data <- cbind(estimate_richness(frogs.data.rare, measures = "Fisher"),
sample_data(frogs.data.rare))
model <- aov(Fisher ~ 0 + Group, data = div_data)
anova(model)
## Analysis of Variance Table
##
## Response: Fisher
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 4 14511.5 3627.9 441.31 < 2.2e-16 ***
## Residuals 20 164.4 8.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(model)
## GroupA GroupB GroupC GroupD
## 21.22114 27.53372 25.74423 23.39613
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Fisher ~ 0 + Group, data = div_data)
##
## $Group
## diff lwr upr p adj
## B-A 6.312576 1.6792872 10.9458651 0.0055175
## C-A 4.523089 -0.1101996 9.1563784 0.0572392
## D-A 2.174992 -2.4582975 6.8082805 0.5651326
## C-B -1.789487 -6.4227757 2.8438022 0.7047377
## D-B -4.137585 -8.7708736 0.4957043 0.0905876
## D-C -2.348098 -6.9813868 2.2851911 0.5029462
Diversité entre les communautés, comparaison des échantillons entre eux / les communautés entre elles.
Bray curtis nous permet de visualiser la composition, l’abondance, la dominance, etc., de populations ou de communautés dans l’écosystème et peut être utilisé pour obtenir un résultat du degré de dissemblance entre les espèces. L’indice de Bray-Curtis donne le même poids aux différences d’abondances observées pour les espèces rares que pour les espèces importantes. Si =1 cela signifie que les communautés sont totalement différentes.
Cet indice va de 0 (les deux échantillons partagent les mêmes OTUs) à 1 (les deux échantillons n’ont aucun OTU en commun). Exemple: if 30% of taxa are in both samples, this means 70% are only found in one sample, and the Jaccard distance is 0.7
Unifrac (phylogénie) prend en compte arbre phylogénique et l’abondance : la distance UniFrac entre deux échantillons se base sur les longueurs des branches de l’arbre partagées entre ces deux échantillons. Unifrac = Nb de branches contenant des communautés identiques / Nb total de branches. Si unifrac= 0 Branches partagées Si unfrac= 1 Pas de branches partagées
Weighted UniFrac (pondéré) intègre les abondances lors du calcul des longueurs de branches partagées / non partagées pour calculer la distance, de sorte que l’impact des caractéristiques de faible abondance est diminué.
UniFrac pondéré est utile pour examiner les différences dans la structure de la communauté; UniFrac non pondéré est plus sensible aux différences de caractéristiques de faible abondance. Les deux sont donc utiles à interpréter ensemble et il y a des cas où les deux, un seul ou les deux peuvent produire des différences significatives entre les groupes expérimentaux.
SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.bc <- distance(frogs.data.rare, "bray")
plot_dist_as_heatmap(dist.bc, order = SampleOrder,title="Bray Distances", show.names = T)
SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.jac <- distance(frogs.data.rare, "cc")
plot_dist_as_heatmap(dist.jac, order = SampleOrder,title="Jaccard Distances", show.names = T)
SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.uf <- distance(frogs.data.rare, "unifrac")
plot_dist_as_heatmap(dist.uf, order = SampleOrder,title="Unifrac Distances", show.names = T)
SampleOrder <- levels(reorder(sample_names(frogs.data.rare), as.numeric(get_variable(frogs.data.rare, "Group"))))
dist.wuf <- distance(frogs.data.rare, "wunifrac")
plot_dist_as_heatmap(dist.wuf, order = SampleOrder,title="Weighted Unifrac Distances", show.names = T)
On va faire une MDS (non-parametric multi-dimensional scaling) aussi appelé PCoA pour avoir une représentation en 2D de la distance entre tous les échantillons. Permet de visusaliser les similitudes entre les groupes.
La mise à l’échelle multidimensionnelle (MDS) est une approche populaire pour représenter graphiquement les relations entre des objets (par exemple, des tracés ou des échantillons) dans un espace multidimensionnel.
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "bray")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
#Si on veut non des groupes sur ellipses:
#p <- plot_samples(frogs.data.rare, ord, color = "Groupe",shape="Groupe")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Bray Curtis")+ scale_color_manual(values = family.palette)
p <- p + stat_ellipse(aes(group = Groupe))+geom_point(size=2)
#ggplotly(p)
plot(p)
Legende dans le graph et pvalue affichée
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "bray")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Bray Curtis")+ scale_color_manual(values = family.palette)
p2 <- p + stat_ellipse(aes(group = Group),size=1) + #taille ellipses
geom_point(size=2.5)+ #taille points
theme(legend.position = c(0.6,0.25)) + #place legende
theme( legend.title = element_text(color = "black", size = 10),#couleur et taille legende
legend.text = element_text( size = 10)) +
#Ajouter pval sur graph
annotate("text", x = -0.35, y = -0.6, label = "p = 0.001 ***")
plot(p2)
#p2 <- p2 + theme(legend.position = c(0.05, 0.02), legend.justification = c(0, 0))
#- legend.position g?re la position dans le cadran: (0, 0) en bas ?
#gauche, (0.5, 0.5) au milieu, (0, 1) en haut ? gauche, (1, 1) en haut ? droite, etc
#- legend.justification g?re l'alignement par rapport ? la position:
#(0.5, 0.5) pour centrer la l?gende autour de legend.position, (0, 0)
#pour aligner son coin inf?rieur gauche avec legend.position, (0, 1) pour aligner son coin sup?rieur gauche, etc.
#ggsave(p, file = "/Users/NIPPONNOYE/Documents/2020 Boulot Mag/2020 Télétravail/Teletravail_20201001/Bray.eps", width = 10, height = 7)
Combo avec 3 graphiques pour publi
#install.packages("gridExtra")
library("gridExtra")
#install.packages("cowplot")
library("cowplot")
pcombo<-ggdraw() +
draw_plot(p1, 0, .5, 1, .5) +
draw_plot(p2, 0, 0, .5, .5) +
draw_plot(p3, .5, 0, .5, .5) +
draw_plot_label(c("A", "B", "C"), c(0, 0, 0.5), c(1, 0.5, 0.5), size = 20)+panel_border()
plot(pcombo)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/figure_publi_10x10.tiff", width = 10, height = 10)
#ggsave(p, file = "C:/Users/mmonnoye/Documents/figure_publi_10x10.png", width = 10, height = 10)
Nom échantillon sur points
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "bray")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Bray Curtis")+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)
#script pour ajuster les noms sur les points
#p = plot_ordination(run_invert, SMP.ord.invert, color = "Site", shape = "Species", label = "Run_no")
# p + theme_bw() + geom_text(mapping = aes(label = Run_no), size = 10, vjust = 1.5) theme(text = element_text(size = 16)) + geom_point(size = 4)
#ou
#p = plot_ordination(run_invert, SMP.ord.invert, color = "Site", shape = "Species", label = "Run_no")
# + geom_text(aes(label = Run_no), size = 10, vjust = 1.5)+ geom_point(size = 4)+theme_bw()
Une facette wrap par groupe
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "cc")
family.palette<- c("Chow" = "#FA6466",
"HFPalm" = "#D9AE0F",
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Jaccard ")
p <- p + stat_ellipse(aes(group = Group))+ geom_point(size = 2)+ scale_color_manual(values = family.palette)+ facet_wrap(~Group, ncol = 6)
#ggplotly(p)
plot(p)
Couleurs de base
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "cc")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Jaccard ")+ geom_point(size = 2)
#ggplotly(p)
plot(p)
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "unifrac")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Unifrac")
p <- p + stat_ellipse(aes(group = Group))+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "unifrac")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + Unifrac")+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "wUnifrac")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + wUnifrac")
p <- p + stat_ellipse(aes(group = Group))+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)
ord <- ordinate(frogs.data.rare, method = "MDS", distance = "wUnifrac")
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_ordination(frogs.data.rare, ord, color = "Group",label="Name",shape="Group")
p <- p + theme_bw() + ggtitle("MDS + wUnifrac")+ geom_point(size = 2)+ scale_color_manual(values = family.palette)
#ggplotly(p)
plot(p)
Pour confirmer l’effet du régime sur les distances, on va faire une permanova. Analyse multivariée de la variance par permutations basée sur les matrices de distances.
adonis(dist.bc ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
##
## Call:
## adonis(formula = dist.bc ~ Group, data = as(sample_data(frogs.data.rare), "data.frame"))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group 3 0.45015 0.150049 1.7973 0.21235 0.01 **
## Residuals 20 1.66972 0.083486 0.78765
## Total 23 2.11986 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Df: degree of freedom. Sums Of Sqs: sum of squares. MeanSqs: sum of squares by degree of freedom. F: F statistics. R2: partial R-squared. Pr(>F) p-value based
adonis(dist.jac ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
##
## Call:
## adonis(formula = dist.jac ~ Group, data = as(sample_data(frogs.data.rare), "data.frame"))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group 3 0.43359 0.144530 1.9459 0.22594 0.001 ***
## Residuals 20 1.48547 0.074274 0.77406
## Total 23 1.91906 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(dist.uf ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
##
## Call:
## adonis(formula = dist.uf ~ Group, data = as(sample_data(frogs.data.rare), "data.frame"))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group 3 0.17031 0.056771 1.7581 0.20868 0.011 *
## Residuals 20 0.64584 0.032292 0.79132
## Total 23 0.81615 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(dist.wuf ~ Group,data = as(sample_data(frogs.data.rare), 'data.frame'))
##
## Call:
## adonis(formula = dist.wuf ~ Group, data = as(sample_data(frogs.data.rare), "data.frame"))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Group 3 0.17102 0.057008 2.353 0.26087 0.009 **
## Residuals 20 0.48456 0.024228 0.73913
## Total 23 0.65558 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
On peut aussi faire un clustering des échantillons pour vérifier s’ils se regroupent par groupe (ou autre).
Ward : c’est la plus courante. Elle consiste à réunir les deux clusters dont le regroupement fera le moins baisser l’inertie interclasse. C’est la distance de Ward qui est utilisée : la distance entre deux classes est celle de leurs barycentres au carré, pondérée par les effectifs des deux clusters. Cette technique tend à regrouper les petites classes entre elles.
Single : méthode du saut minimum prend en compte la plus petite distance entre A et B.
Complete : méthode du diamètre qui prend en compte la plus grande distance entre A et B.
Median : moyenne de tous les liens, qu’ils soient entre observations de deux clusters différents ou intraclasses. Cette méthode est la seule qui s’attache directement au cluster obtenu et non aux caractéristiques des clusters candidats.
Average : le logiciel mesure tous les liens entre chaque observation du cluster A et chaque observation du cluster B et en fait une moyenne. C’est une des méthodes les plus efficaces. Elle tend à réunir des clusters aux inerties faibles.
#family.palette<- c("Chow" = "#FA6466",
# "HFPalm" = "#D9AE0F",
# "A" = "#09C51F",
# "C" = "#0995C5",
# "D" = "#094DC5")
#plot_clust(frogs.data.rare, dist = dist.wuf, method = "ward.D2", color ="Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "ward.D", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "single", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "complete", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "median", color = "Group", palette = family.palette)
#plot_clust(frogs.data.rare, dist = dist.bc, method = "average", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.wuf, method = "ward.D2", color ="Group")
plot_clust(frogs.data.rare, dist = dist.bc, method = "ward.D", color = "Group")
plot_clust(frogs.data.rare, dist = dist.bc, method = "single", color = "Group")
plot_clust(frogs.data.rare, dist = dist.bc, method = "complete", color = "Group")
plot_clust(frogs.data.rare, dist = dist.bc, method = "median", color = "Group")
plot_clust(frogs.data.rare, dist = dist.bc, method = "average", color = "Group")
plot_clust(frogs.data.rare, dist = dist.jac, method = "ward.D2", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.jac, method = "ward.D", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.jac, method = "single", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.jac, method = "complete", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.jac, method = "median", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.jac, method = "average", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.uf, method = "ward.D2", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.uf, method = "ward.D", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.uf, method = "single", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.uf, method = "complete", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.uf, method = "median", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.uf, method = "average", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.wuf, method = "ward.D", color="Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.wuf, method = "single", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.wuf, method = "complete", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.wuf, method = "median", color = "Group", palette = family.palette)
plot_clust(frogs.data.rare, dist = dist.wuf, method = "average", color = "Group", palette = family.palette)
L’analyse des composants principaux permet d’extraire les informations importantes d’un tableau de données multivariées et d’exprimer ces informations sous la forme d’un ensemble de quelques nouvelles variables appelées composants principaux. L’ACP va conduire à un sous espace de plus petite dimension, tel que la projection sur ce sous-espace retienne la majeure partie de l’information.
PCA sur les 50 OTU les plus représentés
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")
library("factoextra")
m <- as.data.frame(t(otu_table(frogs.data)))
m <- sqrt(m)
pca <- prcomp(m[colSums(m) != 0], center = TRUE, scale = TRUE)
p <- fviz_pca_biplot(pca, axes = c(1, 2), geom.ind = c("point", "text"), habillage = get_variable(frogs.data, "Group"), invisible = "quali", geom.var = c("arrow", "text"), select.var = list(contrib = 50), title = "Principal Component Analysis")
p<-p+ scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c("A","B", "C", "D"),
values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
plot(p + theme_bw())
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p <- plot_tree(physeq = prune_taxa(names(sort(taxa_sums(frogs.data), decreasing = TRUE)[1:20]), frogs.data), method = "sampledodge", color = "Group", shape = "Group", size = "abundance", sizebase = 5, ladderize = "left", plot.margin = 0, title = "Phylogenetic tree of the 20 most abundant OTU ")
p<-p+ scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c("A","B", "C", "D"),
values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
plot(p)
#Si on veut ajouter le rang taxonomique Family à la représentation:
#p <- plot_tree(physeq = prune_taxa(names(sort(taxa_sums(frogs.data), decreasing = TRUE)[1:20]), frogs.data), method = "sampledodge", color = "Groupe", shape = "Groupe", size = "abundance", label.tips = "Family", sizebase = 5, ladderize = "left", plot.margin = 0, title = "Phylogenetic tree of the 20 most abundant OTU ")
#plot(p)
DESeq : méthode de normalisation dérivées de la transcriptomique : Outils qui va chercher à estimer la variabilité biologique des différents échantillons afin de les normaliser et de calculer des ratios moyen pour chaque OTU. Il va également déterminer quels OTUs sont différentiellement exprimés en cherchant des valeurs de log2 de ratios significativement supérieures à 1 ou inférieures ? -1 et en employant pour cela le test de Wald.
Rappels: OTU / Operational Taxonomic Unit = Groupe d’individus dont les séquences 16S sont similaires à au moins 97% Abondance= nombre d’individus d’une espèce
Comparaisons entre ces 4 groupes: “A”, “B”,“C”, “D” à TF
On va chercher les OTUs différentiellement abondants entre les 6 groupes avec pval=0.05 et min.abundance=30
extract_daotus <- function(cond1, cond2, physeq, pval = 0.05) {
res <- DESeq2::results(dds, contrast=c("Group", cond1, cond2)) %>% as.data.frame() %>%
mutate(OTU = taxa_names(physeq)) %>% filter(padj < pval) %>%
inner_join(tax_table(physeq) %>% as("matrix") %>% as_tibble(rownames = "OTU"), by = "OTU") %>%
arrange(log2FoldChange)
return(res)
}
build_plotdata_daotus <- function(da.otus, physeq, min.abundance = 50) {
da.data <- rarefy_even_depth(physeq, rng = 20170329, verbose = FALSE) %>%
prune_taxa(da.otus$OTU, .)
is_abundant <- function(x) { x > min.abundance }
abundant_taxa <- genefilter_sample(da.data, is_abundant, A = 1)
da.data <- prune_taxa(abundant_taxa, da.data)
max.level <- apply(as(tax_table(da.data), "matrix"), 1, function(x) {
y <- x[!is.na(x)];
y <- y[y != ""];
y <- y[!grepl("unknown", y, ignore.case = T)]
y <- y[y != "Multi-affiliation"]
# if (names(y)[length(y)] != "Family") {
# return(paste(y["Family"], y[length(y)], sep = "/"))
#} else {
return(y[length(y)])
# }
})
otu.formatted.names <- data.frame(OTU2 = paste0(gsub(pattern = "Cluster_", "", names(max.level)), "_", max.level),
OTU = names(max.level),
stringsAsFactors = FALSE) %>%
inner_join(da.otus, by = "OTU") %>% arrange(log2FoldChange)
otu.order <- otu.formatted.names %>% pull(OTU2)
da.df <- merge(psmelt(da.data), otu.formatted.names) %>% mutate(OTU2 = factor(OTU2, levels = otu.order))
return(da.df)
}
is_present <- function(comptage) {comptage > 0} ## tester la présence d'un OTU
prevalent_taxa <- genefilter_sample(frogs.data, is_present, A = 2)
filtered.data <- prune_taxa(prevalent_taxa, frogs.data)
cds <- phyloseq_to_deseq2(filtered.data, design = ~ Group)
dds <- DESeq2::DESeq(cds)
da.otus_1 <- extract_daotus("A", "B", filtered.data, pval = 0.05)
da.otus_2 <- extract_daotus("A", "C", filtered.data, pval = 0.05)
da.otus_3 <- extract_daotus("A", "D", filtered.data, pval = 0.05)
da.otus_4 <- extract_daotus("B", "C", filtered.data, pval = 0.05)
da.otus_5 <- extract_daotus("B", "D", filtered.data, pval = 0.05)
da.otus_6 <- extract_daotus("C", "D", filtered.data, pval = 0.05)
da.otus <- bind_rows(da.otus_1,
da.otus_2,da.otus_3,
da.otus_4,da.otus_5,
da.otus_6) %>%
filter(!duplicated(OTU))
#Filtrer OTU avec pval<0,01
#da.otu<-subset(da.otus,padj<0.01)
#dim(da.otus)
Diagramme de Venn représentant la comparaison des OTUs differentiellement abondants
# da.otus_1 <- extract_daotus("A", "B", filtered.data, pval = 0.05)
# da.otus_2 <- extract_daotus("A", "C", filtered.data, pval = 0.05)
# da.otus_3 <- extract_daotus("A", "D", filtered.data, pval = 0.05)
# da.otus_4 <- extract_daotus("B", "C", filtered.data, pval = 0.05)
# da.otus_5 <- extract_daotus("B", "D", filtered.data, pval = 0.05)
# da.otus_6 <- extract_daotus("C", "D", filtered.data, pval = 0.05)
# da.otus <- bind_rows(da.otus_1,
# da.otus_2,
# da.otus_3,
# da.otus_4,
# da.otus_5,
# da.otus_6) %>%
# filter(!duplicated(OTU))
#
# venn.plot <- venn.diagram(list("A/B" = da.otus_1$OTU,
# "A/C" = da.otus_2$OTU,
# "A/D" = da.otus_3$OTU,
# "B/C" = da.otus_4$OTU,
# "B/D" = da.otus_5$OTU,
# "C/D" = da.otus_6$OTU),
# filename = NULL,cex = 3, cat.cex = 2)
# grid.newpage(); grid.draw(venn.plot)
#ggsave(venn.plot, file = "C:/Users/mmonnoye/Documents/Biom/Odorants/venn_plot.png", width = 21, height = 13)
res <- inner_join(da.otus,
## Transformer la table taxonomique en tableau avec une colonne OTU
tax_table(frogs.data) %>% as("matrix") %>% as_tibble(rownames = "OTU"),
by = "OTU")
res
## baseMean log2FoldChange lfcSE stat pvalue padj
## 1 59.208946 -9.185524 1.6709031 -5.497341 3.855611e-08 2.866004e-06
## 2 14.706752 -8.166348 2.1461900 -3.805044 1.417788e-04 3.021270e-03
## 3 13.900219 -6.898993 1.8446423 -3.740016 1.840082e-04 3.419486e-03
## 4 3.015104 -5.636569 1.2413992 -4.540497 5.612182e-06 3.128791e-04
## 5 8.076004 -5.073970 1.2781357 -3.969821 7.192677e-05 2.004959e-03
## 6 43.483700 -4.919280 1.2970466 -3.792678 1.490313e-04 3.021270e-03
## 7 5.403261 -4.538722 1.1740815 -3.865764 1.107421e-04 2.743943e-03
## 8 1.955552 -4.417891 1.2602907 -3.505454 4.558287e-04 7.819215e-03
## 9 94.281842 -4.392355 0.7552902 -5.815454 6.046960e-09 6.742360e-07
## 10 5.482008 -4.063610 1.0174795 -3.993801 6.502248e-05 2.004959e-03
## 11 375.456923 -3.959179 0.5670440 -6.982137 2.907239e-12 6.483142e-10
## 12 1.985743 -3.793471 1.2407648 -3.057365 2.232922e-03 3.556726e-02
## 13 15.309849 -3.674789 0.9010442 -4.078368 4.535301e-05 1.685620e-03
## 14 9.993880 -3.062343 0.7160035 -4.276995 1.894329e-05 8.448706e-04
## 15 12.316053 1.633146 0.5418508 3.014014 2.578160e-03 3.832865e-02
## 16 6.218214 -6.756366 2.0438980 -3.305628 9.476385e-04 1.647694e-02
## 17 3.907696 -6.194004 2.1103870 -2.935008 3.335386e-03 3.332233e-02
## 18 1.702005 -5.134653 1.2790219 -4.014515 5.956815e-05 1.856541e-03
## 19 2.235932 -4.159548 1.3855904 -3.002004 2.682088e-03 3.134690e-02
## 20 2.103234 -4.099545 1.3989900 -2.930360 3.385691e-03 3.332233e-02
## 21 5.401424 -2.781896 0.9071836 -3.066520 2.165664e-03 2.901705e-02
## 22 402.124138 -2.687610 0.8767019 -3.065591 2.172399e-03 2.901705e-02
## 23 21.308089 -2.382748 0.6978631 -3.414349 6.393464e-04 1.264485e-02
## 24 96.765775 1.758121 0.5796869 3.032880 2.422316e-03 3.593102e-02
## 25 29.106484 2.412163 0.7495958 3.217952 1.291096e-03 2.298151e-02
## 26 217.573068 26.425441 4.1489050 6.369257 1.899465e-10 1.690524e-08
## OTU Kingdom.x Phylum.x Class.x Order.x
## 1 Cluster_103 Bacteria Firmicutes Clostridia Clostridiales
## 2 Cluster_64 Bacteria Firmicutes Clostridia Clostridiales
## 3 Cluster_122 Bacteria Firmicutes Clostridia Clostridiales
## 4 Cluster_455 Bacteria Firmicutes Clostridia Clostridiales
## 5 Cluster_8 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 6 Cluster_62 Bacteria Firmicutes Clostridia Clostridiales
## 7 Cluster_290 Bacteria Firmicutes Clostridia Clostridiales
## 8 Cluster_744 Bacteria Firmicutes Clostridia Clostridiales
## 9 Cluster_25 Bacteria Firmicutes Clostridia Clostridiales
## 10 Cluster_383 Bacteria Firmicutes Clostridia Clostridiales
## 11 Cluster_12 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 12 Cluster_236 Bacteria Firmicutes Clostridia Clostridiales
## 13 Cluster_159 Bacteria Firmicutes Clostridia Clostridiales
## 14 Cluster_162 Bacteria Firmicutes Clostridia Clostridiales
## 15 Cluster_160 Bacteria Firmicutes Clostridia Clostridiales
## 16 Cluster_70 Bacteria Firmicutes Clostridia Clostridiales
## 17 Cluster_978 Bacteria Firmicutes Clostridia Clostridiales
## 18 Cluster_1110 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 19 Cluster_902 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 20 Cluster_42 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 21 Cluster_205 Bacteria Firmicutes Clostridia Clostridiales
## 22 Cluster_5 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 23 Cluster_95 Bacteria Firmicutes Clostridia Clostridiales
## 24 Cluster_28 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 25 Cluster_78 Bacteria Firmicutes Clostridia Clostridiales
## 26 Cluster_46 Bacteria Firmicutes Clostridia Clostridiales
## Family.x Genus.x Species.x
## 1 Clostridiales vadinBB60 group Unknown Unknown
## 2 Lachnospiraceae Unknown Unknown
## 3 Lachnospiraceae GCA-900066575 Unknown
## 4 Ruminococcaceae Unknown Unknown
## 5 Muribaculaceae Unknown Unknown
## 6 Clostridiales vadinBB60 group Unknown Unknown
## 7 Lachnospiraceae Unknown Unknown
## 8 Lachnospiraceae Unknown Unknown
## 9 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 10 Lachnospiraceae Unknown Unknown
## 11 Muribaculaceae Unknown Unknown
## 12 Lachnospiraceae Unknown Unknown
## 13 Lachnospiraceae Unknown Unknown
## 14 Lachnospiraceae Unknown Unknown
## 15 Lachnospiraceae Unknown Unknown
## 16 Lachnospiraceae Marvinbryantia Unknown
## 17 Lachnospiraceae Unknown Unknown
## 18 Muribaculaceae Unknown Unknown
## 19 Muribaculaceae Unknown Unknown
## 20 Muribaculaceae Unknown Unknown
## 21 Ruminococcaceae Butyricicoccus Unknown
## 22 Prevotellaceae Alloprevotella Unknown
## 23 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 24 Muribaculaceae Unknown Unknown
## 25 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 26 Ruminococcaceae Ruminococcaceae UCG-014 Unknown
## Kingdom.y Phylum.y Class.y Order.y
## 1 Bacteria Firmicutes Clostridia Clostridiales
## 2 Bacteria Firmicutes Clostridia Clostridiales
## 3 Bacteria Firmicutes Clostridia Clostridiales
## 4 Bacteria Firmicutes Clostridia Clostridiales
## 5 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 6 Bacteria Firmicutes Clostridia Clostridiales
## 7 Bacteria Firmicutes Clostridia Clostridiales
## 8 Bacteria Firmicutes Clostridia Clostridiales
## 9 Bacteria Firmicutes Clostridia Clostridiales
## 10 Bacteria Firmicutes Clostridia Clostridiales
## 11 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 12 Bacteria Firmicutes Clostridia Clostridiales
## 13 Bacteria Firmicutes Clostridia Clostridiales
## 14 Bacteria Firmicutes Clostridia Clostridiales
## 15 Bacteria Firmicutes Clostridia Clostridiales
## 16 Bacteria Firmicutes Clostridia Clostridiales
## 17 Bacteria Firmicutes Clostridia Clostridiales
## 18 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 19 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 20 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 21 Bacteria Firmicutes Clostridia Clostridiales
## 22 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 23 Bacteria Firmicutes Clostridia Clostridiales
## 24 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 25 Bacteria Firmicutes Clostridia Clostridiales
## 26 Bacteria Firmicutes Clostridia Clostridiales
## Family.y Genus.y Species.y
## 1 Clostridiales vadinBB60 group Unknown Unknown
## 2 Lachnospiraceae Unknown Unknown
## 3 Lachnospiraceae GCA-900066575 Unknown
## 4 Ruminococcaceae Unknown Unknown
## 5 Muribaculaceae Unknown Unknown
## 6 Clostridiales vadinBB60 group Unknown Unknown
## 7 Lachnospiraceae Unknown Unknown
## 8 Lachnospiraceae Unknown Unknown
## 9 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 10 Lachnospiraceae Unknown Unknown
## 11 Muribaculaceae Unknown Unknown
## 12 Lachnospiraceae Unknown Unknown
## 13 Lachnospiraceae Unknown Unknown
## 14 Lachnospiraceae Unknown Unknown
## 15 Lachnospiraceae Unknown Unknown
## 16 Lachnospiraceae Marvinbryantia Unknown
## 17 Lachnospiraceae Unknown Unknown
## 18 Muribaculaceae Unknown Unknown
## 19 Muribaculaceae Unknown Unknown
## 20 Muribaculaceae Unknown Unknown
## 21 Ruminococcaceae Butyricicoccus Unknown
## 22 Prevotellaceae Alloprevotella Unknown
## 23 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 24 Muribaculaceae Unknown Unknown
## 25 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 26 Ruminococcaceae Ruminococcaceae UCG-014 Unknown
#write.xlsx(res,"Abondances différentielles globales TF.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
#write.table(res, "Abondances différentielles globales TF.csv", row.names=FALSE, sep="\t",dec=",", na=" ")
TABLEAU CI DESSUS AU FORMAT EXCEL DANS LE FICHIER “TABLEAU DE VALEURS” JOINT AVEC CE DOCUMENT
Représentation de la table d’abondance. Montre les intéractions entre (les groupes) de taxa et (les groupes) d’échantillons. Elle essaie de trouver un ordre des echantillons et des OTU pour faire sortir une info interessante.
## On rajoute une fausse colonne nomm?e "my_rank" dans la table taxonomique dans laquelle on a concaténé nom de famille et nom de cluster
my.da.otus <- frogs.data.rare
tdf <- tax_table(frogs.data.rare)
tdf <- cbind(tdf, my_rank = paste0(tdf[, "Family"], " (", taxa_names(frogs.data.rare), ")"))
tax_table(my.da.otus) <- tdf
#Ensuite tu peux appeler plot_heatmap sur my.data.otus comme avant en remplaçant taxa.label = "Family" par taxa.label = #"my_rank"
#Script ci dessous pour avoir ordre des échantillons par ordre croissant:
#p<-plot_heatmap(prune_taxa(da.otus$OTU,my.da.otus) , taxa.order = da.otus$OTU,sample.order=sample_names(my.da.otus),low #= "yellow", high = "red", na.value = "white", title = "DAOTUs",taxa.label = "my_rank")+facet_grid(". ~ Groupe", scales #= "free_x")
#p<-p+ theme(axis.text.x = element_text(color="black", size=10, angle=90),axis.text.y = element_text(color="black", #size=10))
#plot(p)
#Script avec ordre pour info interessante et non pas ordre grandeur sample na.value= valeur manquante
p<-plot_heatmap(prune_taxa(da.otus$OTU,my.da.otus) ,low = "yellow", high = "red", na.value = "white", title = "DAOTUs TF",taxa.label = "my_rank")+facet_grid(". ~ Group", scales = "free_x")
p<-p+ theme(axis.text.x = element_text(color="black", size=10, angle=90),axis.text.y = element_text(color="black", size=10))
plot(p)
#ggplotly(p)
#Figure 8: Heatmap avec dendrogram et clustering sample
#library( "genefilter" )
#topVarGenes <- head( order( rowVars( da.otus ), decreasing=TRUE ), 35 ) #35 sample avec variance la plus haute
#heatmap.2( assay(rld)[ topVarGenes, ], scale="row",
# trace="none", dendrogram="column",
# col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))
# p <- plot_heatmap(prune_taxa(names(sort(taxa_sums(data.rare), decreasing = TRUE)[1:49]), data.rare), distance = "bray", method = "MDS", low = "yellow", high = "red", na.value = "white", title = "Taxa heatmap by samples",taxa.label = "Genus")
# p <- p + facet_grid(". ~ Group", scales = "free_x")
#
# plot(p)
# p <- plot_heatmap(prune_taxa(names(sort(taxa_sums(data.rare), decreasing = TRUE)[1:49]), data.rare), distance = "bray", method = "MDS", low = "yellow", high = "red", na.value = "white", title = "Taxa heatmap by samples",taxa.label = "Family")
# p <- p + facet_grid(". ~ Type", scales = "free_x")
#
# plot(p)
La fonction build_plotdata_daotus ne conserve que les OTUs qui ont un nombre total de reads supérieur à 30. C’est pour se débarasser des OTUs dont l’abondance est 0 dans le groupe A et éventuellement 2 ou 3 dans le groupe B. Ils sont statistiquement significatifs mais potentiellement pas très intéressants.
plotdata <- build_plotdata_daotus(da.otus, filtered.data, min.abundance = 50)
p <- ggplot(plotdata, aes(x = Group, y = Abundance)) + geom_boxplot(aes(group = Group, fill = Group)) + theme_bw() + theme(legend.position = "none") + facet_wrap(~OTU2, ncol = 4, scales = "free_y")+ theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1,size=15)) + stat_compare_means(method = "anova", hide.ns = TRUE,vjust = 0.5) # https://www.datanovia.com/en/fr/lessons/anova-dans-r/ Site sur l'anova et test post hoc
family.palette<- c(
"A" = "#09C51F",
"B" = "#8BE9F9",
"C" = "#0995C5",
"D" = "#094DC5")
p<-p+scale_color_manual(values = family.palette)+ scale_fill_manual(breaks = c( "A","B", "C", "D"),
values=c("#09C51F","#8BE9F9", "#0995C5", "#094DC5"))
#ggplotly(p)
plot(p)
OTUs différentiellement abondants entre 2 régimes.
#Fonctions auxiliaires g?n?riques qui sont utilis?es pour toutes les analyses (?vite la r?p?titon de blocs de code)
## Extraction des OTUs diff?rentiels et ajout des informations taxonomiques
extract_daotus <- function(cond1, cond2, physeq, pval = 0.05) { #CHANGE IF YOU WANT
res <- DESeq2::results(dds, contrast=c("Group", cond1, cond2)) %>% as.data.frame() %>%
mutate(OTU = taxa_names(physeq)) %>% filter(padj < pval) %>%
inner_join(tax_table(physeq) %>% as("matrix") %>% as_tibble(rownames = "OTU"), by = "OTU") %>%
arrange(log2FoldChange)
return(res)
}
## Construction du tableau de donn?es pour repr?sentation graphique
build_plotdata_daotus <- function(da.otus, physeq, min.abundance = 50) { #CHANGE IF YOU WANT
da.data <- rarefy_even_depth(physeq, rng = 20170329, verbose = FALSE) %>%
prune_taxa(da.otus$OTU, .)
is_abundant <- function(x) { x > min.abundance }
abundant_taxa <- genefilter_sample(da.data, is_abundant, A = 1)
da.data <- prune_taxa(abundant_taxa, da.data)
max.level <- apply(as(tax_table(da.data), "matrix"), 1, function(x) {
y <- x[!is.na(x)];
y <- y[y != ""];
y <- y[!grepl("unknown", y, ignore.case = T)]
y <- y[y != "Multi-affiliation"]
# if (names(y)[length(y)] != "Family") {
# return(paste(y["Family"], y[length(y)], sep = "/"))
#} else {
return(y[length(y)])
# }
})
otu.formatted.names <- data.frame(OTU2 = paste0(gsub(pattern = "Cluster_", "", names(max.level)), "_", max.level),
OTU = names(max.level),
stringsAsFactors = FALSE) %>%
inner_join(da.otus, by = "OTU") %>% arrange(log2FoldChange)
otu.order <- otu.formatted.names %>% pull(OTU2)
da.df <- merge(psmelt(da.data), otu.formatted.names) %>% mutate(OTU2 = factor(OTU2, levels = otu.order))
return(da.df)
}
## Fonction de heatmap
plot_da_heatmap <- function(da.otus, physeq) {
## Build plotdata and order OTU according to lfc
plotdata <- build_plotdata_daotus(da.otus, physeq, 0) %>%
mutate(OTU = factor(OTU, levels = da.otus$OTU))
## Build vector label
labvec <- with(da.otus, paste0(Family, " (", OTU, ")"))
names(labvec) <- da.otus$OTU
## Automatic text size
text_size <- function(n, mins = 8, maxs = 30, B = 12, D = 100) {
s <- B * exp(-n/D)
s <- ifelse(s > mins, s, mins)
s <- ifelse(s < maxs, s, maxs)
return(s)
}
ggplot(plotdata) + aes(x = Sample, y = OTU, fill = Abundance) +
geom_tile() +
facet_grid(~ Groupe, scales = "free_x") +
labs(y = NULL, x = "Sample") +
scale_y_discrete(labels = labvec) +
scale_fill_gradient(low = "yellow", high = "red", na.value = "white", trans = scales::log_trans(4)) +
theme(axis.text.x = element_text(angle = 270, size = text_size(nsamples(physeq))),
axis.text.y = element_text(size = text_size(nrow(da.otus))))
}
plot_da_boxplot <- function(plotdata) {
ggplot(plotdata, aes(x = Group, y = Abundance)) +
geom_boxplot(aes(group = Group, fill = Group)) +
theme_bw() +
theme(legend.position = "none") +
facet_wrap(~OTU2, ncol = 3, scales = "free_y") +
geom_text(aes(y = Inf, x=Inf, label = paste0("p = ", format(pvalue, digits = 4))), size=4,
hjust = 1, vjust = 1)
}
## Function pour le graphique composite
plot_da_composite <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) { #CHANGE IF YOU WANT
## Custome theme #on peut mettre 3
custom.theme <- theme_bw() +
theme(panel.spacing = unit(0.2, "lines"),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
strip.text.y = element_blank())
## Plotdata
plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%
filter(abs(log2FoldChange) >= lfc.threshold) %>%
arrange(log2FoldChange)
## Correct OTU and Family Order
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels = fam.order),
OTU.label = if_else(Genus != "Unknown", Genus, NA_character_))
## Annotation data
annotate.data <- plotdata %>% group_by(Family) %>%
summarize(OTU = length(unique(OTU))) %>%
mutate(label = factor(Family, levels = fam.order),
log2FoldChange = min(plotdata$log2FoldChange) * 1.1
## log2FoldChange = -Inf
)
## Plots
p.lfc <- ggplot(plotdata, aes(y = OTU, x = log2FoldChange, color = Family)) +
geom_point() +
geom_vline(xintercept = 0, linetype = 2) +
facet_grid(Family~., space = "free_y", scale = "free_y", switch = "both") +
geom_text(data = annotate.data, aes(label = label), hjust = 0, vjust = 1) + #vjust=1 c'est le point au dessus du nom family
geom_text(aes(label = OTU.label, hjust="inward", vjust=1),size=3.5) #size=3 taille nom family de couleur
#vjust=0.5 Noms family à coté point
p.evidence <- ggplot(plotdata, aes(x = OTU, y = -log10(padj), fill = Family)) + geom_bar(stat = "identity") +
facet_grid(Family~., space = "free_y", scale = "free_y") +
coord_flip() + ylab("Evidence")
if (is.null(labvec)) {
labvec <- setNames(nm = unique(plotdata$Group))
}
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>%
summarize(Abundance = mean(Abundance)) %>%
ungroup() %>%
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels =fam.order),
Group = factor(Group, levels = names(labvec), labels = labvec))
p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() +
facet_grid(Family~., space = "free_y", scale = "free_y") + #Heatmap séparée par Family
scale_fill_gradient(trans = scales::log_trans(base = 2), na.value = "white",
low = "white", high = "black")
p.lfc.2 <- p.lfc
p.lfc.2$layers[[3]] <- NULL
p1 <- arrangeGrob(p.lfc.2 + custom.theme +
theme(legend.position = "none",
strip.text.y = element_text( angle = 180,hjust = 1, size = 10), #Noms Family sur la gauche
strip.background = element_blank()),
p.evidence + custom.theme +
scale_x_discrete(position = "top") +
theme(legend.position = "none",
axis.text.y = element_text(size = 8)),
p.hm + custom.theme,
widths = c(0.47, 0.38, 0.15), #taille largeur des 3 cadres doit être =1
nrow = 1,
ncol = 3)
grid::grid.newpage()
grid::grid.draw(p1)
invisible(p1)
}
##############################################################
######## graphique composite 2 ###############################
format_names <- function(physeq, min.rank = 0) {
tax <- as(tax_table(physeq), "matrix")
tax[tax %in% c("Unknown", "Multi-Affiliation")] <- NA
final_rank <- function(x) {
x <- x[!is.na(x)]
if (length(x) <= min.rank) {
return("")
} else {
tail(x, 1)
}
}
apply(tax, 1, final_rank)
}
## Function pour le graphique composite
plot_da_compositebis <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) { #CHANGE IF YOU WANT
## Custome theme #on peut mettre 3 ou 2
custom.theme <- theme_bw() +
theme(panel.spacing = unit(0.2, "lines"),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
strip.text.y = element_blank(),
axis.text.x = element_text(size = 10)) #taille noms groupes heatmap
## Plotdata
plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%
inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
mutate(OTU = taxa_names(data),
OTU.label = format_names(data)),
by = "OTU") %>%
filter(abs(log2FoldChange) >= 3) %>% #On peut changer la valeur du log2FoldChange =3 ou 2 #CHANGE IF YOU WANT
arrange(log2FoldChange)
##### Correct OTU and Family Order Ajout Mahendra
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
## Ce que tu veux rajouter
inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
mutate(OTU = taxa_names(data),
OTU.label = format_names(data),
OTU.label.2 = format_names(data, min.rank = 5)),
by = "OTU") %>%
## Tri des OTUs par log fold change décroissant et des familles par ordre alphabétique au sein des phylums
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels = fam.order))
## Plots
#br <- seq(-20, 10, by = 2)
#p.lfc <- ggplot(plotdata, aes(y = OTU, x = 2^log2FoldChange, color = Family)) +
#geom_point(aes(size = 1)) + geom_vline(xintercept = 1, linetype = 2) + #size=1 taille point et noms family
#geom_text(aes(x = 2^(log2FoldChange - .25*sign(log2FoldChange)),
# label = OTU.label.2, size = 1, #legende des tests PCR
# hjust = if_else(log2FoldChange > 0, 1, 0))) +
# scale_color_brewer(palette = "Paired") + #scale_color_brewer(palette="Paired") si plus de 10 Family
# scale_x_continuous(trans = "log2", breaks = 2^br,
# labels = c(paste0("1/", 2^-br[br <0]), 2^br[br>= 0]),
# name = "Fold change (log scale)") +
#scale_y_discrete(position = "right")
#library(paletteer)
p.lfc <- ggplot(plotdata, aes(y = OTU, x = log2FoldChange, color = Family)) +
geom_point(size=5) + geom_vline(xintercept = 0, linetype = 2) + #size=1 taille point et noms family
geom_text(aes(x = log2FoldChange - .25*sign(log2FoldChange),
label = OTU.label.2, vjust=0.5, #Taille et position noms family à côté des points
hjust = if_else(log2FoldChange > 0, 1, 0)))+
# scale_color_paletteer_d("bpalette")+
scale_color_brewer(palette = "Paired") + #scale_color_brewer(palette="Paired") si plus de 10 Family
scale_y_discrete(position = "right")
# + scale_x_continuous(limits = c(-15, 35)) ######## pour changer echelle axe des x (log2foldchange)
#Heatmap
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>%
summarize(Abundance = mean(Abundance)) %>%
ungroup() %>%
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels =fam.order),
Group = factor(Group, levels = names(labvec), labels = labvec))
p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() +
scale_fill_gradient(trans = log_trans(base = 2), na.value = "white",
low = "white", high = "black")
#
p2 <- arrangeGrob(p.lfc + custom.theme + guides(colour = guide_legend(override.aes = list(size=5)))+ #taille points de la legende
theme(legend.position = c(0.025, 0.975), ########### OU theme(legend.position = c(0.575, 0.425) ##################
legend.justification = c(0, 1),
legend.text = element_text(size = 16), #taille noms legende
legend.background = element_rect(fill = "transparent"),
#axis.text.y = element_text(size = 9, hjust = 0.5), #Affichage noms des clusters
axis.text.x = element_text(size = 10)), #taille noms legende axe des x
p.hm + custom.theme, #p.hm défini la heatmap
widths = c(0.80, 0.20), #largeur heatmap
nrow = 1,
ncol = 2)
grid::grid.newpage()
grid::grid.draw(p2)
invisible(p2)
}
############## Function pour le graphique composite TER
plot_da_compositeter <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) { #CHANGE IF YOU WANT
## Custome theme #on peut mettre 3 ou 2
custom.theme <- theme_bw() +
theme(panel.spacing = unit(0.2, "lines"),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
strip.text.y = element_blank(),
axis.text.x = element_text(size = 10)) #taille noms groupes heatmap
## Plotdata
plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%
inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
mutate(OTU = taxa_names(data),
OTU.label = format_names(data)),
by = "OTU") %>%
filter(abs(log2FoldChange) >= 3) %>% #On peut changer la valeur du log2FoldChange =3 ou 2 #CHANGE IF YOU WANT
arrange(log2FoldChange)
##### Correct OTU and Family Order Ajout Mahendra
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
## Ce que tu veux rajouter
inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
mutate(OTU = taxa_names(data),
OTU.label = format_names(data),
OTU.label.2 = format_names(data, min.rank = 5)),
by = "OTU") %>%
## Tri des OTUs par log fold change décroissant et des familles par ordre alphabétique au sein des phylums
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels = fam.order))
## Plots
#br <- seq(-20, 10, by = 2)
#p.lfc <- ggplot(plotdata, aes(y = OTU, x = 2^log2FoldChange, color = Family)) +
#geom_point(aes(size = 1)) + geom_vline(xintercept = 1, linetype = 2) + #size=1 taille point et noms family
#geom_text(aes(x = 2^(log2FoldChange - .25*sign(log2FoldChange)),
# label = OTU.label.2, size = 1, #legende des tests PCR
# hjust = if_else(log2FoldChange > 0, 1, 0))) +
# scale_color_brewer(palette = "Paired") + #scale_color_brewer(palette="Paired") si plus de 10 Family
# scale_x_continuous(trans = "log2", breaks = 2^br,
# labels = c(paste0("1/", 2^-br[br <0]), 2^br[br>= 0]),
# name = "Fold change (log scale)") +
#scale_y_discrete(position = "right")
#library(paletteer)
p.lfc <- ggplot(plotdata, aes(y = OTU, x = log2FoldChange, color = Family)) +
geom_point(size=5) + geom_vline(xintercept = 0, linetype = 2) + #size=1 taille point et noms family
geom_text(aes(x = log2FoldChange - .25*sign(log2FoldChange),
label = OTU.label.2, vjust=0.5, #Taille et position noms family à côté des points
hjust = if_else(log2FoldChange > 0, 1, 0)))+
# scale_color_paletteer_d("bpalette")+
scale_color_brewer(palette = "Paired") + #scale_color_brewer(palette="Paired") si plus de 10 Family
scale_y_discrete(position = "right") + scale_x_continuous(limits = c(-15, 30)) ######## pour changer echelle axe des x (log2foldchange)
#Heatmap
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>%
summarize(Abundance = mean(Abundance)) %>%
ungroup() %>%
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels =fam.order),
Group = factor(Group, levels = names(labvec), labels = labvec))
p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() +
scale_fill_gradient(trans = log_trans(base = 2), na.value = "white",
low = "white", high = "black")
#
p2 <- arrangeGrob(p.lfc + custom.theme + guides(colour = guide_legend(override.aes = list(size=5)))+ #taille points de la legende
theme(legend.position = c(0.575, 0.525), ########### OU theme(legend.position = c(0.575, 0.425) ##################
legend.justification = c(0, 1),
legend.text = element_text(size = 16), #taille noms legende
legend.background = element_rect(fill = "transparent"),
#axis.text.y = element_text(size = 9, hjust = 0.5), #Affichage noms des clusters
axis.text.x = element_text(size = 10)), #taille noms legende axe des x
p.hm + custom.theme, #p.hm défini la heatmap
widths = c(0.80, 0.20), #largeur heatmap
nrow = 1,
ncol = 2)
grid::grid.newpage()
grid::grid.draw(p2)
invisible(p2)
}
################################################################
## tester la présence d'un OTU
is_present <- function(comptage) {comptage > 0}
On répète l’analyse d’abondance différentielle précédente en ne prenant que les échantillons “Chow” et “HFPalm” en utilisant la même p-valeur ajustée, pval=0.05
Nous avons 12 échantillons.
On commence par sélectionner les échantillons
data <- frogs.data %>% subset_samples(Group %in% c("A","B"))
Puis on ne conserve que les taxas prévalents
prevalent_taxa <- genefilter_sample(data, is_present, A = 2) #25% des echantillons
filtered.data <- prune_taxa(prevalent_taxa, data)
On fait ensuite l’analyse d’abondance différentielle avec DESeq2
cds <- phyloseq_to_deseq2(filtered.data, design = ~ Group)
dds <- DESeq2::DESeq(cds)
da.otus <- extract_daotus("A","B", filtered.data, pval = 0.05)
da.otus
## baseMean log2FoldChange lfcSE stat pvalue padj
## 1 46.271680 -9.075573 1.4450227 -6.280575 3.373236e-10 2.951582e-08
## 2 18.863758 -7.296419 1.5552303 -4.691536 2.711614e-06 1.186331e-04
## 3 13.224601 -6.781594 1.6302812 -4.159769 3.185691e-05 9.291598e-04
## 4 7.717996 -6.492563 1.9217697 -3.378429 7.290119e-04 9.112648e-03
## 5 8.111684 -6.070274 1.8941783 -3.204701 1.352030e-03 1.575652e-02
## 6 3.929734 -5.520146 1.2298744 -4.488382 7.176625e-06 2.511819e-04
## 7 10.397043 -4.937106 1.3096854 -3.769689 1.634512e-04 3.575495e-03
## 8 39.622790 -4.821684 1.3860675 -3.478679 5.038924e-04 7.348430e-03
## 9 3.684448 -4.417765 1.5201683 -2.906103 3.659614e-03 3.049678e-02
## 10 2.390621 -4.300409 1.1653326 -3.690285 2.240033e-04 4.355621e-03
## 11 74.521857 -4.289727 0.8135819 -5.272643 1.344729e-07 7.844251e-06
## 12 3.876456 -3.902242 1.2246589 -3.186391 1.440596e-03 1.575652e-02
## 13 168.417425 -3.841147 0.5456952 -7.038997 1.936282e-12 3.388494e-10
## 14 2.332818 -3.680933 1.1886545 -3.096723 1.956727e-03 1.902374e-02
## 15 19.626578 -3.525414 0.9298623 -3.791329 1.498432e-04 3.575495e-03
## 16 7.923295 -2.983064 0.8671563 -3.440054 5.815992e-04 7.829220e-03
## 17 279.087246 -1.659972 0.4682697 -3.544905 3.927539e-04 6.873194e-03
## 18 17.017952 1.748375 0.6144782 2.845300 4.436959e-03 3.529399e-02
## 19 75.707869 1.921791 0.6142047 3.128909 1.754568e-03 1.806173e-02
## 20 166.331913 2.336232 0.6645127 3.515707 4.385846e-04 6.977483e-03
## 21 90.947920 3.075570 1.0302121 2.985376 2.832304e-03 2.608701e-02
## 22 10.935456 3.763516 1.2855825 2.927480 3.417215e-03 2.990063e-02
## OTU Kingdom Phylum Class Order
## 1 Cluster_103 Bacteria Firmicutes Clostridia Clostridiales
## 2 Cluster_64 Bacteria Firmicutes Clostridia Clostridiales
## 3 Cluster_122 Bacteria Firmicutes Clostridia Clostridiales
## 4 Cluster_288 Bacteria Firmicutes Clostridia Clostridiales
## 5 Cluster_91 Bacteria Firmicutes Clostridia Clostridiales
## 6 Cluster_455 Bacteria Firmicutes Clostridia Clostridiales
## 7 Cluster_8 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 8 Cluster_62 Bacteria Firmicutes Clostridia Clostridiales
## 9 Cluster_290 Bacteria Firmicutes Clostridia Clostridiales
## 10 Cluster_744 Bacteria Firmicutes Clostridia Clostridiales
## 11 Cluster_25 Bacteria Firmicutes Clostridia Clostridiales
## 12 Cluster_383 Bacteria Firmicutes Clostridia Clostridiales
## 13 Cluster_12 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 14 Cluster_236 Bacteria Firmicutes Clostridia Clostridiales
## 15 Cluster_159 Bacteria Firmicutes Clostridia Clostridiales
## 16 Cluster_162 Bacteria Firmicutes Clostridia Clostridiales
## 17 Cluster_5 Bacteria Bacteroidetes Bacteroidia Bacteroidales
## 18 Cluster_160 Bacteria Firmicutes Clostridia Clostridiales
## 19 Cluster_40 Bacteria Firmicutes Clostridia Clostridiales
## 20 Cluster_39 Bacteria Firmicutes Clostridia Clostridiales
## 21 Cluster_76 Bacteria Firmicutes Clostridia Clostridiales
## 22 Cluster_351 Bacteria Firmicutes Clostridia Clostridiales
## Family Genus Species
## 1 Clostridiales vadinBB60 group Unknown Unknown
## 2 Lachnospiraceae Unknown Unknown
## 3 Lachnospiraceae GCA-900066575 Unknown
## 4 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 5 Ruminococcaceae Ruminococcaceae UCG-014 Unknown
## 6 Ruminococcaceae Unknown Unknown
## 7 Muribaculaceae Unknown Unknown
## 8 Clostridiales vadinBB60 group Unknown Unknown
## 9 Lachnospiraceae Unknown Unknown
## 10 Lachnospiraceae Unknown Unknown
## 11 Lachnospiraceae Lachnospiraceae NK4A136 group Unknown
## 12 Lachnospiraceae Unknown Unknown
## 13 Muribaculaceae Unknown Unknown
## 14 Lachnospiraceae Unknown Unknown
## 15 Lachnospiraceae Unknown Unknown
## 16 Lachnospiraceae Unknown Unknown
## 17 Prevotellaceae Alloprevotella Unknown
## 18 Lachnospiraceae Unknown Unknown
## 19 Lachnospiraceae Blautia Unknown
## 20 Lachnospiraceae Unknown Unknown
## 21 Lachnospiraceae Unknown Unknown
## 22 Lachnospiraceae Lachnospiraceae UCG-006 Unknown
#write.xlsx(da.otus,"TF Chow HFPalm.xlsx",row.names=TRUE,col.names = TRUE, sep="\t",dec=",",na=" ")
#write.table(da.otus, "Control et Ind_Inf_Int.csv", row.names=FALSE, sep="\t",dec=",", na=" ")
TABLEAU CI DESSUS AU FORMAT EXCEL DANS LE FICHIER “TABLEAU DE VALEURS” JOINT AVEC CE DOCUMENT
Le nombre d’OTUs DA en fonction de la p-valeur est represente ci-dessous:
ggplot(da.otus %>% arrange(padj) %>% mutate(Nb = 1:n()),
aes(x = padj, y = Nb)) +
geom_point() +
scale_x_log10(breaks = 10^(-seq(2, 41, 3))) +
labs(x = "p valeur ajustée", y = "Nombre d'OTUs DA")
plot_da_heatmap(da.otus, data) + ggtitle("DAOTUs[A-B] ")
plotdata <- build_plotdata_daotus(da.otus, data, min.abundance = 50)
plot_da_boxplot(plotdata)
Représentation en boxplot des OTUs differentiellements abondants entre les groupe HFD et HFD_Antibiotiques en fonction du Fold Change
plotdata<-inner_join(da.otus,psmelt(data))%>%arrange(log2FoldChange)%>%filter(Group%in%c("A","B"))
ggplot(plotdata,aes(x=Group,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+facet_wrap(~Phylum)+scale_y_log10()+geom_boxplot()
# Ou ceci:
#plotdata<-inner_join(res,psmelt(frogs.data.rare))%>%arrange(log2FoldChange)%>%filter(Groupe%in%c("TEMOIN", "COLZA_SOJA"))
#ggplot(plotdata,aes(x=Groupe,y=Abundance))+facet_wrap(~Phylum)+scale_y_log10()+geom_boxplot(aes(group = Groupe, fill = Groupe))
plotdata<-inner_join(da.otus,psmelt(data))%>%arrange(log2FoldChange)%>%filter(Group%in%c("A","B"))
ggplot(plotdata,aes(x=Group,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+facet_wrap(~Family)+scale_y_log10()+geom_boxplot()
p<-ggplot(plotdata,aes(x=OTU,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1))+geom_boxplot()
#ggplotly(p)
plot(p)
# p<-ggplot(plotdata, aes(x=OTU2,y=Abundance,fill=Phylum,color=Phylum,alpha=Group)) +
# scale_y_log10() +
# scale_alpha_discrete(range=c(0,1), guide=guide_legend(override.aes = list(fill="black"))) +
# theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1,size=12)) +
# geom_boxplot()+ geom_text(aes(y = 1000, label = paste0("p = ", format(pvalue)),angle=90))
# #ggplotly(p)
# plot(p)
# plotdata<-inner_join(da.otus,psmelt(data))%>%arrange(log2FoldChange)%>%filter(Group%in%c("Chow","HFPalm"))
# ggplot(plotdata,aes(x=Group,y=Abundance,fill=Phylum,color=Phylum,alpha=Group))+scale_y_log10()+scale_alpha_discrete(range=c(0,1),guide=guide_legend(override.aes = list(fill="black")))+facet_wrap(~Family)+scale_y_log10()+geom_boxplot()
#p<-ggplot(plotdata, aes(x=OTU2,y=Abundance,fill=Family,color=Family,alpha=Groupe)) +
# scale_y_log10() +
# scale_alpha_discrete(range=c(0,1), guide=guide_legend(override.aes = list(fill="black"))) +
# theme(axis.text.x = element_text(angle=90,vjust = 1,hjust = 1,size=12)) +
# geom_boxplot()+ geom_text(aes(y = 400, label = paste0("p = ", format(pvalue)),angle=90))
#ggplotly(p)
#plot(p)
plot_da_composite(da.otus, data, labvec = c("Chow" = "Chow", "HFPalm" = "HFP"), min.abundance = 50, lfc.threshold = 3)
Graphique sur lequel n’est affiché que la classification d’un OTU que si elle est plus précise que la famille (qui apparait déjà via la couleur)
plot_da_compositeter(da.otus, data, labvec = c("Chow" = "Chow", "HFPalm" = "HFP"), min.abundance = 50, lfc.threshold = 3)
#ggsave(p, file = "MMDaOTUsT90 HR_2HFDvsNAFLR_2HFD pval=0.001 min abundance=50 log2foldchange=5.png",width = 15, height = 7,dpi=300)
format_names <- function(physeq, min.rank = 0) {
tax <- as(tax_table(physeq), "matrix")
tax[tax %in% c("Unknown", "Multi-Affiliation")] <- NA
final_rank <- function(x) {
x <- x[!is.na(x)]
if (length(x) <= min.rank) {
return("")
} else {
tail(x, 1)
}
}
apply(tax, 1, final_rank)
}
## Function pour le graphique composite
plot_da_compositebis <- function(da.otus, physeq, labvec = NULL, lfc.threshold = 3, min.abundance = 50) {
## Custome theme #on peut mettre 3 ou 2
custom.theme <- theme_bw() +
theme(panel.spacing = unit(0.2, "lines"),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
strip.text.y = element_blank(),
axis.text.x = element_text(size = 11)) #taille noms groupes heatmap
## Plotdata
plotdata <- build_plotdata_daotus(da.otus, physeq, min.abundance = min.abundance) %>%
inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
mutate(OTU = taxa_names(data),
OTU.label = format_names(data)),
by = "OTU") %>%
filter(abs(log2FoldChange) >= 3) %>% #On peut changer la valeur du log2FoldChange =3 ou 2
arrange(log2FoldChange)
##### Correct OTU and Family Order Ajout Mahendra
otu.order <- da.otus %>% pull(OTU)
fam.order <- da.otus %>% select(Phylum, Family) %>% unique() %>% arrange(Phylum, Family) %>% pull(Family)
plotdata <- plotdata %>%
## Ce que tu veux rajouter
inner_join(tax_table(data) %>% as("matrix") %>% as.data.frame() %>%
mutate(OTU = taxa_names(data),
OTU.label = format_names(data),
OTU.label.2 = if_else(Genus != "Unknown", Genus, NA_character_)), #format_names(data, min.rank = 4) #Pour changer les noms à côté des points
by = "OTU") %>%
## Tri des OTUs par log fold change décroissant et des familles par ordre alphabétique au sein des phylums
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels = fam.order))
## Plots
#family.palette<- c("Atopobiaceae" = "#DE1906",
# "Bacteroidaceae" = "#217AD4",
# "Rikenellaceae" = "#1BA728",
# "Tannerellaceae" = "#7C3E9D",
# "Lachnospiraceae" = "#E89018",
# "Ruminococcaceae" = "#905B03",
# "Desulfovibrionaceae" = "#F897CF"
# )
br <- seq(-10, 10, by = 2)
p.lfc <- ggplot(plotdata, aes(y = OTU, x = 2^log2FoldChange, color = Family)) +
geom_point(size=5) + geom_vline(xintercept = 1, linetype = 2) + #size=1 taille point et noms family
geom_text(aes(x = 2^(log2FoldChange - .25*sign(log2FoldChange)),
label = OTU.label.2, vjust=0.5, #noms family à côté des points en pas juste en dessous
hjust = if_else(log2FoldChange > 0, 1, 0))) +
scale_color_brewer(palette="Paired") + # scale_color_manual(values = family.palette) si plus de 10 Family
scale_x_continuous(trans = "log2", breaks = 2^br,
labels = c(paste0("1/", 2^-br[br <0]), 2^br[br>= 0]),
name = "Fold change (log scale)") +
scale_y_discrete(position = "right")
#Heatmap
hm.data <- plotdata %>% group_by_at(vars(-Sample, -Name, -Abundance)) %>%
summarize(Abundance = mean(Abundance)) %>%
ungroup() %>%
mutate(OTU = factor(OTU, levels = otu.order),
Family = factor(Family, levels =fam.order),
Group = factor(Group, levels = names(labvec), labels = labvec))
p.hm <- ggplot(hm.data, aes(y = OTU, x = Group, fill = Abundance)) + geom_tile() +
scale_fill_gradient(trans = log_trans(base = 2), na.value = "white",
low = "white", high = "black")
#
p2 <- arrangeGrob(p.lfc + custom.theme + guides(colour = guide_legend(override.aes = list(size=5)))+ #taille points de la legende
theme(legend.position = c(0.025, 0.975),
legend.justification = c(0, 1),
legend.text = element_text(size = 16), #taille noms legende
legend.background = element_rect(fill = "transparent"),
axis.text.y = element_text(size = 9, hjust = 0.5), #Affichage noms des clusters,
axis.text.x = element_text(size = 11)),
#taille noms legende axe des x
p.hm + custom.theme, #p.hm défini la heatmap
widths = c(0.83, 0.17), #largeur heatmap
nrow = 1,
ncol = 2)
grid::grid.newpage()
grid::grid.draw(p2)
invisible(p2)
}
p<-plot_da_compositebis(da.otus, data, labvec = c("Chow" = "Chow", "HFPalm" = "HFP"), min.abundance = 50, lfc.threshold = 3)
#ggsave(p, file = "Plot Comoposite H vs NAFL.png",width = 13, height = 6,dpi=300)